diff options
Diffstat (limited to 'drivers/infiniband/hw')
142 files changed, 5343 insertions, 4775 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index a300588634c5..b930ea3dab7a 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -150,7 +150,7 @@ struct bnxt_re_dev { struct delayed_work worker; u8 cur_prio_map; - u8 active_speed; + u16 active_speed; u8 active_width; /* FP Notification Queue (CQ & SRQ) */ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 1d7a9ca5240c..401bdc9e931e 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -532,7 +532,7 @@ fail: } /* Protection Domains */ -void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) +int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; @@ -542,6 +542,7 @@ void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) if (pd->qplib_pd.id) bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, &pd->qplib_pd); + return 0; } int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) @@ -601,13 +602,14 @@ fail: } /* Address Handles */ -void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) +int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, !(flags & RDMA_DESTROY_AH_SLEEPABLE)); + return 0; } static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) @@ -938,9 +940,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, return PTR_ERR(umem); qp->sumem = umem; - qplib_qp->sq.sg_info.sghead = umem->sg_head.sgl; - qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem); - qplib_qp->sq.sg_info.nmap = umem->nmap; + qplib_qp->sq.sg_info.umem = umem; qplib_qp->sq.sg_info.pgsize = PAGE_SIZE; qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT; qplib_qp->qp_handle = ureq.qp_handle; @@ -953,9 +953,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; - qplib_qp->rq.sg_info.sghead = umem->sg_head.sgl; - qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem); - qplib_qp->rq.sg_info.nmap = umem->nmap; + qplib_qp->rq.sg_info.umem = umem; qplib_qp->rq.sg_info.pgsize = PAGE_SIZE; qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT; } @@ -1273,10 +1271,12 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, } qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */ - if (init_attr->create_flags) + if (init_attr->create_flags) { ibdev_dbg(&rdev->ibdev, "QP create flags 0x%x not supported", init_attr->create_flags); + return -EOPNOTSUPP; + } /* Setup CQs */ if (init_attr->send_cq) { @@ -1568,7 +1568,7 @@ static enum ib_mtu __to_ib_mtu(u32 mtu) } /* Shared Receive Queues */ -void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) +int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); @@ -1583,6 +1583,7 @@ void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) atomic_dec(&rdev->srq_count); if (nq) nq->budget--; + return 0; } static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, @@ -1608,9 +1609,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, return PTR_ERR(umem); srq->umem = umem; - qplib_srq->sg_info.sghead = umem->sg_head.sgl; - qplib_srq->sg_info.npages = ib_umem_num_pages(umem); - qplib_srq->sg_info.nmap = umem->nmap; + qplib_srq->sg_info.umem = umem; qplib_srq->sg_info.pgsize = PAGE_SIZE; qplib_srq->sg_info.pgshft = PAGE_SHIFT; qplib_srq->srq_handle = ureq.srq_handle; @@ -1660,8 +1659,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.max_wqe = entries; srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; - srq->qplib_srq.wqe_size = - bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); + /* 128 byte wqe size for SRQ . So use max sges */ + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; @@ -1832,6 +1831,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, unsigned int flags; u8 nw_type; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp->qplib_qp.modify_flags = 0; if (qp_attr_mask & IB_QP_STATE) { curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state); @@ -2081,6 +2083,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, goto out; } qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); + qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state); qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); qp_attr->pkey_index = qplib_qp->pkey_index; @@ -2800,7 +2803,7 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, } /* Completion Queues */ -void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct bnxt_re_cq *cq; struct bnxt_qplib_nq *nq; @@ -2816,6 +2819,7 @@ void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) atomic_dec(&rdev->cq_count); nq->budget--; kfree(cq->cql); + return 0; } int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, @@ -2829,6 +2833,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct bnxt_qplib_nq *nq = NULL; unsigned int nq_alloc_cnt; + if (attr->flags) + return -EOPNOTSUPP; + /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded"); @@ -2860,9 +2867,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rc = PTR_ERR(cq->umem); goto fail; } - cq->qplib_cq.sg_info.sghead = cq->umem->sg_head.sgl; - cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem); - cq->qplib_cq.sg_info.nmap = cq->umem->nmap; + cq->qplib_cq.sg_info.umem = cq->umem; cq->qplib_cq.dpi = &uctx->dpi; } else { cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL); @@ -3774,23 +3779,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) return rc; } -static int bnxt_re_page_size_ok(int page_shift) -{ - switch (page_shift) { - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G: - return 1; - default: - return 0; - } -} - static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, int page_shift) { @@ -3798,7 +3786,7 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, u64 page_size = BIT_ULL(page_shift); struct ib_block_iter biter; - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, page_size) + rdma_umem_for_each_dma_block(umem, &biter, page_size) *pbl_tbl++ = rdma_block_iter_dma_address(&biter); return pbl_tbl - pbl_tbl_orig; @@ -3814,7 +3802,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, struct bnxt_re_mr *mr; struct ib_umem *umem; u64 *pbl_tbl = NULL; - int umem_pgs, page_shift, rc; + unsigned long page_size; + int umem_pgs, rc; if (length > BNXT_RE_MAX_MR_SIZE) { ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n", @@ -3848,42 +3837,34 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, mr->ib_umem = umem; mr->qplib_mr.va = virt_addr; - umem_pgs = ib_umem_page_count(umem); - if (!umem_pgs) { - ibdev_err(&rdev->ibdev, "umem is invalid!"); - rc = -EINVAL; - goto free_umem; - } - mr->qplib_mr.total_size = length; - - pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL); - if (!pbl_tbl) { - rc = -ENOMEM; - goto free_umem; - } - - page_shift = __ffs(ib_umem_find_best_pgsz(umem, - BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, - virt_addr)); - - if (!bnxt_re_page_size_ok(page_shift)) { + page_size = ib_umem_find_best_pgsz( + umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr); + if (!page_size) { ibdev_err(&rdev->ibdev, "umem page size unsupported!"); rc = -EFAULT; - goto fail; + goto free_umem; } + mr->qplib_mr.total_size = length; - if (page_shift == BNXT_RE_PAGE_SHIFT_4K && + if (page_size == BNXT_RE_PAGE_SIZE_4K && length > BNXT_RE_MAX_MR_SIZE_LOW) { ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu", length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); rc = -EINVAL; - goto fail; + goto free_umem; + } + + umem_pgs = ib_umem_num_dma_blocks(umem, page_size); + pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL); + if (!pbl_tbl) { + rc = -ENOMEM; + goto free_umem; } /* Map umem buf ptrs to the PBL */ - umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift); + umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size)); rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl, - umem_pgs, false, 1 << page_shift); + umem_pgs, false, page_size); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register user MR"); goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 1daeb30e06fd..9a8130b79256 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -163,12 +163,12 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); +int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); int bnxt_re_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); @@ -176,7 +176,7 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -void bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, @@ -193,7 +193,7 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 53aee5a42ab8..fdb8c2478258 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -608,7 +608,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); + return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -618,7 +618,7 @@ static ssize_t hca_type_show(struct device *device, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); + return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type); @@ -646,6 +646,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .create_cq = bnxt_re_create_cq, .create_qp = bnxt_re_create_qp, .create_srq = bnxt_re_create_srq, + .create_user_ah = bnxt_re_create_ah, .dealloc_driver = bnxt_re_dealloc_driver, .dealloc_pd = bnxt_re_dealloc_pd, .dealloc_ucontext = bnxt_re_dealloc_ucontext, @@ -701,42 +702,14 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; - /* User space */ - ibdev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - - rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ib_set_device_ops(ibdev, &bnxt_re_dev_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); if (ret) return ret; - return ib_register_device(ibdev, "bnxt_re%d"); + dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX); + return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev); } static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index f78da54a0bc5..995d4633b0a1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -295,9 +295,9 @@ static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events) } } -static void bnxt_qplib_service_nq(unsigned long data) +static void bnxt_qplib_service_nq(struct tasklet_struct *t) { - struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; + struct bnxt_qplib_nq *nq = from_tasklet(nq, t, nq_tasklet); struct bnxt_qplib_hwq *hwq = &nq->hwq; int num_srqne_processed = 0; int num_cqne_processed = 0; @@ -448,8 +448,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, nq->msix_vec = msix_vector; if (need_init) - tasklet_init(&nq->nq_tasklet, bnxt_qplib_service_nq, - (unsigned long)nq); + tasklet_setup(&nq->nq_tasklet, bnxt_qplib_service_nq); else tasklet_enable(&nq->nq_tasklet); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index f7736e34ac64..441eb421e5e5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -50,7 +50,7 @@ #include "qplib_sp.h" #include "qplib_fp.h" -static void bnxt_qplib_service_creq(unsigned long data); +static void bnxt_qplib_service_creq(struct tasklet_struct *t); /* Hardware communication channel */ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) @@ -79,7 +79,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) goto done; do { mdelay(1); /* 1m sec */ - bnxt_qplib_service_creq((unsigned long)rcfw); + bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet); } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count); done: return count ? 0 : -ETIMEDOUT; @@ -370,9 +370,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, } /* SP - CREQ Completion handlers */ -static void bnxt_qplib_service_creq(unsigned long data) +static void bnxt_qplib_service_creq(struct tasklet_struct *t) { - struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data; + struct bnxt_qplib_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet); struct bnxt_qplib_creq_ctx *creq = &rcfw->creq; u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct bnxt_qplib_hwq *hwq = &creq->hwq; @@ -687,8 +687,7 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, creq->msix_vec = msix_vector; if (need_init) - tasklet_init(&creq->creq_tasklet, - bnxt_qplib_service_creq, (unsigned long)rcfw); + tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq); else tasklet_enable(&creq->creq_tasklet); rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 7efa6e5dce62..fa7878336100 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -45,6 +45,9 @@ #include <linux/dma-mapping.h> #include <linux/if_vlan.h> #include <linux/vmalloc.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_umem.h> + #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_sp.h" @@ -87,12 +90,11 @@ static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl, static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl, struct bnxt_qplib_sg_info *sginfo) { - struct scatterlist *sghead = sginfo->sghead; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; int i = 0; - for_each_sg_dma_page(sghead, &sg_iter, sginfo->nmap, 0) { - pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter); + rdma_umem_for_each_dma_block(sginfo->umem, &biter, sginfo->pgsize) { + pbl->pg_map_arr[i] = rdma_block_iter_dma_address(&biter); pbl->pg_arr[i] = NULL; pbl->pg_count++; i++; @@ -104,15 +106,16 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_sg_info *sginfo) { struct pci_dev *pdev = res->pdev; - struct scatterlist *sghead; bool is_umem = false; u32 pages; int i; if (sginfo->nopte) return 0; - pages = sginfo->npages; - sghead = sginfo->sghead; + if (sginfo->umem) + pages = ib_umem_num_dma_blocks(sginfo->umem, sginfo->pgsize); + else + pages = sginfo->npages; /* page ptr arrays */ pbl->pg_arr = vmalloc(pages * sizeof(void *)); if (!pbl->pg_arr) @@ -127,7 +130,7 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, pbl->pg_count = 0; pbl->pg_size = sginfo->pgsize; - if (!sghead) { + if (!sginfo->umem) { for (i = 0; i < pages; i++) { pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev, pbl->pg_size, @@ -183,14 +186,12 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, struct bnxt_qplib_sg_info sginfo = {}; u32 depth, stride, npbl, npde; dma_addr_t *src_phys_ptr, **dst_virt_ptr; - struct scatterlist *sghead = NULL; struct bnxt_qplib_res *res; struct pci_dev *pdev; int i, rc, lvl; res = hwq_attr->res; pdev = res->pdev; - sghead = hwq_attr->sginfo->sghead; pg_size = hwq_attr->sginfo->pgsize; hwq->level = PBL_LVL_MAX; @@ -204,7 +205,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, aux_pages++; } - if (!sghead) { + if (!hwq_attr->sginfo->umem) { hwq->is_user = false; npages = (depth * stride) / pg_size + aux_pages; if ((depth * stride) % pg_size) @@ -213,11 +214,14 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, return -EINVAL; hwq_attr->sginfo->npages = npages; } else { + unsigned long sginfo_num_pages = ib_umem_num_dma_blocks( + hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize); + hwq->is_user = true; - npages = hwq_attr->sginfo->npages; + npages = sginfo_num_pages; npages = (npages * PAGE_SIZE) / BIT_ULL(hwq_attr->sginfo->pgshft); - if ((hwq_attr->sginfo->npages * PAGE_SIZE) % + if ((sginfo_num_pages * PAGE_SIZE) % BIT_ULL(hwq_attr->sginfo->pgshft)) if (!npages) npages++; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 9da470d1e4a3..7a1ab38b95da 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -126,8 +126,7 @@ struct bnxt_qplib_pbl { }; struct bnxt_qplib_sg_info { - struct scatterlist *sghead; - u32 nmap; + struct ib_umem *umem; u32 npages; u32 pgshft; u32 pgsize; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 64d44f51db4b..6316179583a6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -118,7 +118,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, * 128 WQEs needs to be reserved for the HW (8916). Prevent * reporting the max number */ - attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS; + attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ? 6 : sb->max_sge; attr->max_cq = le32_to_cpu(sb->max_cq); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 1f288c73ccfc..8769e7aa097f 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -77,9 +77,9 @@ static int enable_ecn; module_param(enable_ecn, int, 0644); MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); -static int dack_mode = 1; +static int dack_mode; module_param(dack_mode, int, 0644); -MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); +MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)"); uint c4iw_max_read_depth = 32; module_param(c4iw_max_read_depth, int, 0644); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 352b8af1998a..44c2416588d4 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -967,7 +967,7 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } -void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; struct c4iw_ucontext *ucontext; @@ -985,6 +985,7 @@ void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, chp->destroy_skb, chp->wr_waitp); c4iw_put_wr_wait(chp->wr_waitp); + return 0; } int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, @@ -1005,6 +1006,9 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, pr_debug("ib_dev %p entries %d\n", ibdev, entries); if (attr->flags) + return -EOPNOTSUPP; + + if (entries < 1 || entries > ibdev->attrs.max_cqe) return -EINVAL; if (vector >= rhp->rdev.lldi.nciq) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 2b2b009b371a..f85477f3b037 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -983,23 +983,20 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int c4iw_dealloc_mw(struct ib_mw *mw); void c4iw_dealloc(struct uld_ctx *ctx); -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); -void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); +int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs, struct ib_udata *udata); int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 73936c3341b7..a2c71a1d93d5 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -365,22 +365,6 @@ static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, pbl_size, pbl_addr, skb, wr_waitp); } -static int allocate_window(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, - struct c4iw_wr_wait *wr_waitp) -{ - *stag = T4_STAG_UNSET; - return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, - 0UL, 0, 0, 0, 0, NULL, wr_waitp); -} - -static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, - struct sk_buff *skb, - struct c4iw_wr_wait *wr_waitp) -{ - return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, - 0, skb, wr_waitp); -} - static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr, struct c4iw_wr_wait *wr_waitp) @@ -510,7 +494,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, __be64 *pages; int shift, n, i; int err = -ENOMEM; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -548,7 +532,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = PAGE_SHIFT; - n = ib_umem_num_pages(mhp->umem); + n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift); err = alloc_pbl(mhp, n); if (err) goto err_umem_release; @@ -561,8 +545,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { - pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); + rdma_umem_for_each_dma_block(mhp->umem, &biter, 1 << shift) { + pages[i++] = cpu_to_be64(rdma_block_iter_dma_address(&biter)); if (i == PAGE_SIZE / sizeof(*pages)) { err = write_pbl(&mhp->rhp->rdev, pages, mhp->attr.pbl_addr + (n << 3), i, @@ -611,84 +595,6 @@ err_free_mhp: return ERR_PTR(err); } -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) -{ - struct c4iw_dev *rhp; - struct c4iw_pd *php; - struct c4iw_mw *mhp; - u32 mmid; - u32 stag = 0; - int ret; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - php = to_c4iw_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); - if (!mhp->wr_waitp) { - ret = -ENOMEM; - goto free_mhp; - } - - mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); - if (!mhp->dereg_skb) { - ret = -ENOMEM; - goto free_wr_wait; - } - - ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp); - if (ret) - goto free_skb; - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = FW_RI_STAG_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - mhp->ibmw.rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - ret = -ENOMEM; - goto dealloc_win; - } - pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); - return &(mhp->ibmw); - -dealloc_win: - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); -free_skb: - kfree_skb(mhp->dereg_skb); -free_wr_wait: - c4iw_put_wr_wait(mhp->wr_waitp); -free_mhp: - kfree(mhp); - return ERR_PTR(ret); -} - -int c4iw_dealloc_mw(struct ib_mw *mw) -{ - struct c4iw_dev *rhp; - struct c4iw_mw *mhp; - u32 mmid; - - mhp = to_c4iw_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - xa_erase_irq(&rhp->mrs, mmid); - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); - kfree_skb(mhp->dereg_skb); - c4iw_put_wr_wait(mhp->wr_waitp); - pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp); - kfree(mhp); - return 0; -} - struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 6c579d2d3997..1f1f856f8715 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -190,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) +static int c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -202,6 +202,7 @@ static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) mutex_lock(&rhp->rdev.stats.lock); rhp->rdev.stats.pd.cur--; mutex_unlock(&rhp->rdev.stats.lock); + return 0; } static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) @@ -321,8 +322,9 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%d\n", - CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); + return sysfs_emit( + buf, "%d\n", + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } static DEVICE_ATTR_RO(hw_rev); @@ -336,7 +338,7 @@ static ssize_t hca_type_show(struct device *dev, pr_debug("dev 0x%p\n", dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); + return sysfs_emit(buf, "%s\n", info.driver); } static DEVICE_ATTR_RO(hca_type); @@ -347,8 +349,8 @@ static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, - c4iw_dev->rdev.lldi.pdev->device); + return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, + c4iw_dev->rdev.lldi.pdev->device); } static DEVICE_ATTR_RO(board_id); @@ -455,13 +457,11 @@ static const struct ib_device_ops c4iw_dev_ops = { .alloc_hw_stats = c4iw_alloc_stats, .alloc_mr = c4iw_alloc_mr, - .alloc_mw = c4iw_alloc_mw, .alloc_pd = c4iw_allocate_pd, .alloc_ucontext = c4iw_alloc_ucontext, .create_cq = c4iw_create_cq, .create_qp = c4iw_create_qp, .create_srq = c4iw_create_srq, - .dealloc_mw = c4iw_dealloc_mw, .dealloc_pd = c4iw_deallocate_pd, .dealloc_ucontext = c4iw_dealloc_ucontext, .dereg_mr = c4iw_dereg_mr, @@ -497,8 +497,10 @@ static const struct ib_device_ops c4iw_dev_ops = { .query_qp = c4iw_ib_query_qp, .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, - INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq), + INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw), + INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), }; @@ -530,28 +532,6 @@ void c4iw_register_device(struct work_struct *work) if (fastreg_support) dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.local_dma_lkey = 0; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); @@ -567,7 +547,9 @@ void c4iw_register_device(struct work_struct *work) ret = set_netdevs(&dev->ibdev, &dev->rdev); if (ret) goto err_dealloc_ctx; - ret = ib_register_device(&dev->ibdev, "cxgb4_%d"); + dma_set_max_seg_size(&dev->rdev.lldi.pdev->dev, UINT_MAX); + ret = ib_register_device(&dev->ibdev, "cxgb4_%d", + &dev->rdev.lldi.pdev->dev); if (ret) goto err_dealloc_ctx; return; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cbddb20c6121..d109bb3822a5 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2126,7 +2126,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, pr_debug("ib_pd %p\n", pd); - if (attrs->qp_type != IB_QPT_RC) + if (attrs->qp_type != IB_QPT_RC || attrs->create_flags) return ERR_PTR(-EOPNOTSUPP); php = to_c4iw_pd(pd); @@ -2374,6 +2374,9 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, pr_debug("ib_qp %p\n", ibqp); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) attr_mask &= ~IB_QP_STATE; @@ -2471,7 +2474,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; - init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges; init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; @@ -2680,6 +2683,9 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, int ret; int wr_len; + if (attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + pr_debug("%s ib_pd %p\n", __func__, pd); php = to_c4iw_pd(pd); @@ -2797,7 +2803,7 @@ err_free_wr_wait: return ret; } -void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_srq *srq; @@ -2813,4 +2819,5 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) srq->wr_waitp); c4iw_free_srq_idx(&rhp->rdev, srq->idx); c4iw_put_wr_wait(srq->wr_waitp); + return 0; } diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 1889dd172a25..e5d9712e98c4 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -33,7 +33,8 @@ struct efa_irq { char name[EFA_IRQNAME_SIZE]; }; -struct efa_sw_stats { +/* Don't use anything other than atomic64 */ +struct efa_stats { atomic64_t alloc_pd_err; atomic64_t create_qp_err; atomic64_t create_cq_err; @@ -41,11 +42,6 @@ struct efa_sw_stats { atomic64_t alloc_ucontext_err; atomic64_t create_ah_err; atomic64_t mmap_err; -}; - -/* Don't use anything other than atomic64 */ -struct efa_stats { - struct efa_sw_stats sw_stats; atomic64_t keep_alive_rcvd; }; @@ -134,12 +130,12 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index, int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); -void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); struct ib_qp *efa_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); -void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, @@ -156,7 +152,7 @@ void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void efa_destroy_ah(struct ib_ah *ibah, u32 flags); +int efa_destroy_ah(struct ib_ah *ibah, u32 flags); int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 5484b08bbc5d..b199e4ac6cf9 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -61,6 +61,8 @@ enum efa_admin_qp_state { enum efa_admin_get_stats_type { EFA_ADMIN_GET_STATS_TYPE_BASIC = 0, + EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1, + EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2, }; enum efa_admin_get_stats_scope { @@ -68,14 +70,6 @@ enum efa_admin_get_stats_scope { EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1, }; -enum efa_admin_modify_qp_mask_bits { - EFA_ADMIN_QP_STATE_BIT = 0, - EFA_ADMIN_CUR_QP_STATE_BIT = 1, - EFA_ADMIN_QKEY_BIT = 2, - EFA_ADMIN_SQ_PSN_BIT = 3, - EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4, -}; - /* * QP allocation sizes, converted by fabric QueuePair (QP) create command * from QP capabilities. @@ -199,8 +193,14 @@ struct efa_admin_modify_qp_cmd { struct efa_admin_aq_common_desc aq_common_desc; /* - * Mask indicating which fields should be updated see enum - * efa_admin_modify_qp_mask_bits + * Mask indicating which fields should be updated + * 0 : qp_state + * 1 : cur_qp_state + * 2 : qkey + * 3 : sq_psn + * 4 : sq_drained_async_notify + * 5 : rnr_retry + * 31:6 : reserved */ u32 modify_mask; @@ -222,8 +222,8 @@ struct efa_admin_modify_qp_cmd { /* Enable async notification when SQ is drained */ u8 sq_drained_async_notify; - /* MBZ */ - u8 reserved1; + /* Number of RNR retries (valid only for SRD QPs) */ + u8 rnr_retry; /* MBZ */ u16 reserved2; @@ -258,8 +258,8 @@ struct efa_admin_query_qp_resp { /* Indicates that draining is in progress */ u8 sq_draining; - /* MBZ */ - u8 reserved1; + /* Number of RNR retries (valid only for SRD QPs) */ + u8 rnr_retry; /* MBZ */ u16 reserved2; @@ -530,10 +530,36 @@ struct efa_admin_basic_stats { u64 rx_drops; }; +struct efa_admin_messages_stats { + u64 send_bytes; + + u64 send_wrs; + + u64 recv_bytes; + + u64 recv_wrs; +}; + +struct efa_admin_rdma_read_stats { + u64 read_wrs; + + u64 read_bytes; + + u64 read_wr_err; + + u64 read_resp_bytes; +}; + struct efa_admin_acq_get_stats_resp { struct efa_admin_acq_common_desc acq_common_desc; - struct efa_admin_basic_stats basic_stats; + union { + struct efa_admin_basic_stats basic_stats; + + struct efa_admin_messages_stats messages_stats; + + struct efa_admin_rdma_read_stats rdma_read_stats; + } u; }; struct efa_admin_get_set_feature_common_desc { @@ -576,7 +602,9 @@ struct efa_admin_feature_device_attr_desc { /* * 0 : rdma_read - If set, RDMA Read is supported on * TX queues - * 31:1 : reserved - MBZ + * 1 : rnr_retry - If set, RNR retry is supported on + * modify QP command + * 31:2 : reserved - MBZ */ u32 device_caps; @@ -862,6 +890,14 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0) #define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1) +/* modify_qp_cmd */ +#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0) +#define EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE_MASK BIT(1) +#define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK BIT(2) +#define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK BIT(3) +#define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4) +#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK BIT(5) + /* reg_mr_cmd */ #define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0) #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) @@ -878,6 +914,7 @@ struct efa_admin_host_info { /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) /* host_info */ #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 6ac23627f65a..f752ef64159c 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -76,6 +76,7 @@ int efa_com_modify_qp(struct efa_com_dev *edev, cmd.qkey = params->qkey; cmd.sq_psn = params->sq_psn; cmd.sq_drained_async_notify = params->sq_drained_async_notify; + cmd.rnr_retry = params->rnr_retry; err = efa_com_cmd_exec(aq, (struct efa_admin_aq_entry *)&cmd, @@ -121,6 +122,7 @@ int efa_com_query_qp(struct efa_com_dev *edev, result->qkey = resp.qkey; result->sq_draining = resp.sq_draining; result->sq_psn = resp.sq_psn; + result->rnr_retry = resp.rnr_retry; return 0; } @@ -750,11 +752,27 @@ int efa_com_get_stats(struct efa_com_dev *edev, return err; } - result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes; - result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts; - result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes; - result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts; - result->basic_stats.rx_drops = resp.basic_stats.rx_drops; + switch (cmd.type) { + case EFA_ADMIN_GET_STATS_TYPE_BASIC: + result->basic_stats.tx_bytes = resp.u.basic_stats.tx_bytes; + result->basic_stats.tx_pkts = resp.u.basic_stats.tx_pkts; + result->basic_stats.rx_bytes = resp.u.basic_stats.rx_bytes; + result->basic_stats.rx_pkts = resp.u.basic_stats.rx_pkts; + result->basic_stats.rx_drops = resp.u.basic_stats.rx_drops; + break; + case EFA_ADMIN_GET_STATS_TYPE_MESSAGES: + result->messages_stats.send_bytes = resp.u.messages_stats.send_bytes; + result->messages_stats.send_wrs = resp.u.messages_stats.send_wrs; + result->messages_stats.recv_bytes = resp.u.messages_stats.recv_bytes; + result->messages_stats.recv_wrs = resp.u.messages_stats.recv_wrs; + break; + case EFA_ADMIN_GET_STATS_TYPE_RDMA_READ: + result->rdma_read_stats.read_wrs = resp.u.rdma_read_stats.read_wrs; + result->rdma_read_stats.read_bytes = resp.u.rdma_read_stats.read_bytes; + result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err; + result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes; + break; + } return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 190bac23f585..eea4ebfbe6ec 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -47,6 +47,7 @@ struct efa_com_modify_qp_params { u32 qkey; u32 sq_psn; u8 sq_drained_async_notify; + u8 rnr_retry; }; struct efa_com_query_qp_params { @@ -58,6 +59,7 @@ struct efa_com_query_qp_result { u32 qkey; u32 sq_draining; u32 sq_psn; + u8 rnr_retry; }; struct efa_com_destroy_qp_params { @@ -238,8 +240,24 @@ struct efa_com_basic_stats { u64 rx_drops; }; +struct efa_com_messages_stats { + u64 send_bytes; + u64 send_wrs; + u64 recv_bytes; + u64 recv_wrs; +}; + +struct efa_com_rdma_read_stats { + u64 read_wrs; + u64 read_bytes; + u64 read_wr_err; + u64 read_resp_bytes; +}; + union efa_com_get_stats_result { struct efa_com_basic_stats basic_stats; + struct efa_com_messages_stats messages_stats; + struct efa_com_rdma_read_stats rdma_read_stats; }; void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 92d701146320..0f578734bddb 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -245,9 +245,9 @@ static const struct ib_device_ops efa_dev_ops = { .alloc_hw_stats = efa_alloc_hw_stats, .alloc_pd = efa_alloc_pd, .alloc_ucontext = efa_alloc_ucontext, - .create_ah = efa_create_ah, .create_cq = efa_create_cq, .create_qp = efa_create_qp, + .create_user_ah = efa_create_ah, .dealloc_pd = efa_dealloc_pd, .dealloc_ucontext = efa_dealloc_ucontext, .dereg_mr = efa_dereg_mr, @@ -308,30 +308,9 @@ static int efa_ib_device_add(struct efa_dev *dev) dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &pdev->dev; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - - dev->ibdev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - ib_set_device_ops(&dev->ibdev, &efa_dev_ops); - err = ib_register_device(&dev->ibdev, "efa_%d"); + err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) goto err_release_doorbell_bar; @@ -405,20 +384,13 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) return err; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); - if (err) { - dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err); - return err; - } - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_width)); if (err) { - dev_err(&pdev->dev, - "err_pci_set_consistent_dma_mask failed %d\n", - err); + dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err); return err; } + dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; } diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 9e201f169289..479b604e533a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -4,6 +4,7 @@ */ #include <linux/vmalloc.h> +#include <linux/log2.h> #include <rdma/ib_addr.h> #include <rdma/ib_umem.h> @@ -35,6 +36,14 @@ struct efa_user_mmap_entry { op(EFA_RX_BYTES, "rx_bytes") \ op(EFA_RX_PKTS, "rx_pkts") \ op(EFA_RX_DROPS, "rx_drops") \ + op(EFA_SEND_BYTES, "send_bytes") \ + op(EFA_SEND_WRS, "send_wrs") \ + op(EFA_RECV_BYTES, "recv_bytes") \ + op(EFA_RECV_WRS, "recv_wrs") \ + op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \ + op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \ + op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \ + op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ op(EFA_COMPLETED_CMDS, "completed_cmds") \ op(EFA_CMDS_ERR, "cmds_err") \ @@ -142,10 +151,9 @@ to_emmap(struct rdma_user_mmap_entry *rdma_entry) return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); } -static inline bool is_rdma_read_cap(struct efa_dev *dev) -{ - return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; -} +#define EFA_DEV_CAP(dev, cap) \ + ((dev)->dev_attr.device_caps & \ + EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK) #define is_reserved_cleared(reserved) \ !memchr_inv(reserved, 0, sizeof(reserved)) @@ -221,9 +229,12 @@ int efa_query_device(struct ib_device *ibdev, resp.max_rq_wr = dev_attr->max_rq_depth; resp.max_rdma_size = dev_attr->max_rdma_size; - if (is_rdma_read_cap(dev)) + if (EFA_DEV_CAP(dev, RDMA_READ)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; + if (EFA_DEV_CAP(dev, RNR_RETRY)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { @@ -269,7 +280,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, #define EFA_QUERY_QP_SUPP_MASK \ (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ - IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP) + IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY) if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { ibdev_dbg(&dev->ibdev, @@ -291,6 +302,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->sq_psn = result.sq_psn; qp_attr->sq_draining = result.sq_draining; qp_attr->port_num = 1; + qp_attr->rnr_retry = result.rnr_retry; qp_attr->cap.max_send_wr = qp->max_send_wr; qp_attr->cap.max_recv_wr = qp->max_recv_wr; @@ -376,17 +388,18 @@ int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) err_dealloc_pd: efa_pd_dealloc(dev, result.pdn); err_out: - atomic64_inc(&dev->stats.sw_stats.alloc_pd_err); + atomic64_inc(&dev->stats.alloc_pd_err); return err; } -void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibpd->device); struct efa_pd *pd = to_epd(ibpd); ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); efa_pd_dealloc(dev, pd->pdn); + return 0; } static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) @@ -737,18 +750,130 @@ err_free_mapped: err_free_qp: kfree(qp); err_out: - atomic64_inc(&dev->stats.sw_stats.create_qp_err); + atomic64_inc(&dev->stats.create_qp_err); return ERR_PTR(err); } +static const struct { + int valid; + enum ib_qp_attr_mask req_param; + enum ib_qp_attr_mask opt_param; +} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .req_param = IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY, + }, + }, + [IB_QPS_INIT] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY, + }, + [IB_QPS_RTR] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_QKEY, + }, + }, + [IB_QPS_RTR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .req_param = IB_QP_SQ_PSN, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY | + IB_QP_RNR_RETRY, + + } + }, + [IB_QPS_RTS] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY, + }, + }, + [IB_QPS_SQD] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_QKEY, + } + }, + [IB_QPS_SQE] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + } + }, + [IB_QPS_ERR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + } +}; + +static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state, + enum ib_qp_state next_state, + enum ib_qp_attr_mask mask) +{ + enum ib_qp_attr_mask req_param, opt_param; + + if (mask & IB_QP_CUR_STATE && + cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && + cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) + return false; + + if (!srd_qp_state_table[cur_state][next_state].valid) + return false; + + req_param = srd_qp_state_table[cur_state][next_state].req_param; + opt_param = srd_qp_state_table[cur_state][next_state].opt_param; + + if ((mask & req_param) != req_param) + return false; + + if (mask & ~(req_param | opt_param | IB_QP_STATE)) + return false; + + return true; +} + static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + int err; + #define EFA_MODIFY_QP_SUPP_MASK \ (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ - IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN) + IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \ + IB_QP_RNR_RETRY) if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { ibdev_dbg(&dev->ibdev, @@ -757,8 +882,14 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, return -EOPNOTSUPP; } - if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, - qp_attr_mask)) { + if (qp->ibqp.qp_type == IB_QPT_DRIVER) + err = !efa_modify_srd_qp_is_ok(cur_state, new_state, + qp_attr_mask); + else + err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, + qp_attr_mask); + + if (err) { ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); return -EINVAL; } @@ -786,6 +917,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, enum ib_qp_state new_state; int err; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + if (udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)) { ibdev_dbg(&dev->ibdev, @@ -805,28 +939,36 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, params.qp_handle = qp->qp_handle; if (qp_attr_mask & IB_QP_STATE) { - params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) | - BIT(EFA_ADMIN_CUR_QP_STATE_BIT); - params.cur_qp_state = qp_attr->cur_qp_state; - params.qp_state = qp_attr->qp_state; + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE, + 1); + EFA_SET(¶ms.modify_mask, + EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1); + params.cur_qp_state = cur_state; + params.qp_state = new_state; } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { - params.modify_mask |= - BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT); + EFA_SET(¶ms.modify_mask, + EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1); params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; } if (qp_attr_mask & IB_QP_QKEY) { - params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1); params.qkey = qp_attr->qkey; } if (qp_attr_mask & IB_QP_SQ_PSN) { - params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1); params.sq_psn = qp_attr->sq_psn; } + if (qp_attr_mask & IB_QP_RNR_RETRY) { + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY, + 1); + params.rnr_retry = qp_attr->rnr_retry; + } + err = efa_com_modify_qp(&dev->edev, ¶ms); if (err) return err; @@ -843,7 +985,7 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) return efa_com_destroy_cq(&dev->edev, ¶ms); } -void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibcq->device); struct efa_cq *cq = to_ecq(ibcq); @@ -856,6 +998,7 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) efa_destroy_cq_idx(dev, cq->cq_idx); efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE); + return 0; } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, @@ -889,6 +1032,9 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, ibdev_dbg(ibdev, "create_cq entries %d\n", entries); + if (attr->flags) + return -EOPNOTSUPP; + if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { ibdev_dbg(ibdev, "cq: requested entries[%u] non-positive or greater than max[%u]\n", @@ -996,7 +1142,7 @@ err_free_mapped: DMA_FROM_DEVICE); err_out: - atomic64_inc(&dev->stats.sw_stats.create_cq_err); + atomic64_inc(&dev->stats.create_cq_err); return err; } @@ -1013,8 +1159,7 @@ static int umem_to_page_list(struct efa_dev *dev, ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", hp_cnt, pages_in_hp); - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, - BIT(hp_shift)) + rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift)) page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); return 0; @@ -1026,7 +1171,7 @@ static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) struct page *pg; int i; - sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL); + sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL); if (!sglist) return NULL; sg_init_table(sglist, page_cnt); @@ -1370,7 +1515,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, supp_access_flags = IB_ACCESS_LOCAL_WRITE | - (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0); access_flags &= ~IB_ACCESS_OPTIONAL; if (access_flags & ~supp_access_flags) { @@ -1410,9 +1555,8 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_unmap; } - params.page_shift = __ffs(pg_sz); - params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)), - pg_sz); + params.page_shift = order_base_2(pg_sz); + params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); ibdev_dbg(&dev->ibdev, "start %#llx length %#llx params.page_shift %u params.page_num %u\n", @@ -1451,7 +1595,7 @@ err_unmap: err_free: kfree(mr); err_out: - atomic64_inc(&dev->stats.sw_stats.reg_mr_err); + atomic64_inc(&dev->stats.reg_mr_err); return ERR_PTR(err); } @@ -1569,19 +1713,17 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) resp.max_tx_batch = dev->dev_attr.max_tx_batch; resp.min_sq_wr = dev->dev_attr.min_sq_depth; - if (udata && udata->outlen) { - err = ib_copy_to_udata(udata, &resp, - min(sizeof(resp), udata->outlen)); - if (err) - goto err_dealloc_uar; - } + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) + goto err_dealloc_uar; return 0; err_dealloc_uar: efa_dealloc_uar(dev, result.uarn); err_out: - atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err); + atomic64_inc(&dev->stats.alloc_ucontext_err); return err; } @@ -1614,7 +1756,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, ibdev_dbg(&dev->ibdev, "pgoff[%#lx] does not have valid entry\n", vma->vm_pgoff); - atomic64_inc(&dev->stats.sw_stats.mmap_err); + atomic64_inc(&dev->stats.mmap_err); return -EINVAL; } entry = to_emmap(rdma_entry); @@ -1656,7 +1798,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", entry->address, rdma_entry->npages * PAGE_SIZE, entry->mmap_flag, err); - atomic64_inc(&dev->stats.sw_stats.mmap_err); + atomic64_inc(&dev->stats.mmap_err); } rdma_user_mmap_entry_put(rdma_entry); @@ -1741,11 +1883,11 @@ int efa_create_ah(struct ib_ah *ibah, err_destroy_ah: efa_ah_destroy(dev, ah); err_out: - atomic64_inc(&dev->stats.sw_stats.create_ah_err); + atomic64_inc(&dev->stats.create_ah_err); return err; } -void efa_destroy_ah(struct ib_ah *ibah, u32 flags) +int efa_destroy_ah(struct ib_ah *ibah, u32 flags) { struct efa_dev *dev = to_edev(ibah->pd->device); struct efa_ah *ah = to_eah(ibah); @@ -1755,10 +1897,11 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags) if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { ibdev_dbg(&dev->ibdev, "Destroy address handle is not supported in atomic context\n"); - return; + return -EOPNOTSUPP; } efa_ah_destroy(dev, ah); + return 0; } struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) @@ -1774,13 +1917,15 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, struct efa_com_get_stats_params params = {}; union efa_com_get_stats_result result; struct efa_dev *dev = to_edev(ibdev); + struct efa_com_rdma_read_stats *rrs; + struct efa_com_messages_stats *ms; struct efa_com_basic_stats *bs; struct efa_com_stats_admin *as; struct efa_stats *s; int err; - params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL; + params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; err = efa_com_get_stats(&dev->edev, ¶ms, &result); if (err) @@ -1793,6 +1938,28 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, stats->value[EFA_RX_PKTS] = bs->rx_pkts; stats->value[EFA_RX_DROPS] = bs->rx_drops; + params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + ms = &result.messages_stats; + stats->value[EFA_SEND_BYTES] = ms->send_bytes; + stats->value[EFA_SEND_WRS] = ms->send_wrs; + stats->value[EFA_RECV_BYTES] = ms->recv_bytes; + stats->value[EFA_RECV_WRS] = ms->recv_wrs; + + params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + rrs = &result.rdma_read_stats; + stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs; + stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes; + stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; + stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; + as = &dev->edev.aq.stats; stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); @@ -1801,13 +1968,14 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, s = &dev->stats; stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); - stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err); - stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err); - stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->sw_stats.create_cq_err); - stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err); - stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err); - stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err); - stats->value[EFA_MMAP_ERR] = atomic64_read(&s->sw_stats.mmap_err); + stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err); + stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err); + stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err); + stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err); + stats->value[EFA_ALLOC_UCONTEXT_ERR] = + atomic64_read(&s->alloc_ucontext_err); + stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); + stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); return ARRAY_SIZE(efa_stats_names); } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 7eaf99538216..c87b94ea2939 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -15245,7 +15245,8 @@ int hfi1_init_dd(struct hfi1_devdata *dd) & CCE_REVISION_SW_MASK); /* alloc netdev data */ - if (hfi1_netdev_alloc(dd)) + ret = hfi1_netdev_alloc(dd); + if (ret) goto bail_cleanup; ret = set_up_context_variables(dd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a40701a6e1b6..0b64aa87ab73 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1686,7 +1686,6 @@ static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet) u32 extra_bytes; u32 tlen, qpnum; bool do_work, do_cnp; - struct hfi1_ipoib_dev_priv *priv; trace_hfi1_rcvhdr(packet); @@ -1734,8 +1733,7 @@ static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet) if (unlikely(!skb)) goto drop; - priv = hfi1_ipoib_priv(netdev); - hfi1_ipoib_update_rx_netstats(priv, 1, skb->len); + dev_sw_netstats_rx_add(netdev, skb->len); skb->dev = netdev; skb->pkt_type = PACKET_HOST; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 8ca51e43cf53..329ee4f48d95 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1,4 +1,5 @@ /* + * Copyright(c) 2020 Cornelis Networks, Inc. * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or @@ -206,8 +207,6 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) spin_lock_init(&fd->tid_lock); spin_lock_init(&fd->invalid_lock); fd->rec_cpu_num = -1; /* no cpu affinity by default */ - fd->mm = current->mm; - mmgrab(fd->mm); fd->dd = dd; fp->private_data = fd; return 0; @@ -711,7 +710,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) deallocate_ctxt(uctxt); done: - mmdrop(fdata->mm); if (atomic_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b4c6bff60a4e..e09e8244a94c 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,6 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* + * Copyright(c) 2020 Cornelis Networks, Inc. * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or @@ -1451,7 +1452,6 @@ struct hfi1_filedata { u32 invalid_tid_idx; /* protect invalid_tids array and invalid_tid_idx */ spinlock_t invalid_lock; - struct mm_struct *mm; }; extern struct xarray hfi1_dev_table; diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index b8c9d0a003fb..f650cac9d424 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -110,7 +110,6 @@ struct hfi1_ipoib_dev_priv { const struct net_device_ops *netdev_ops; struct rvt_qp *qp; - struct pcpu_sw_netstats __percpu *netstats; }; /* hfi1 ipoib rdma netdev's private data structure */ @@ -126,32 +125,6 @@ hfi1_ipoib_priv(const struct net_device *dev) return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv; } -static inline void -hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv, - u64 packets, - u64 bytes) -{ - struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats); - - u64_stats_update_begin(&netstats->syncp); - netstats->rx_packets += packets; - netstats->rx_bytes += bytes; - u64_stats_update_end(&netstats->syncp); -} - -static inline void -hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv, - u64 packets, - u64 bytes) -{ - struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats); - - u64_stats_update_begin(&netstats->syncp); - netstats->tx_packets += packets; - netstats->tx_bytes += bytes; - u64_stats_update_end(&netstats->syncp); -} - int hfi1_ipoib_send_dma(struct net_device *dev, struct sk_buff *skb, struct ib_ah *address, diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index 014351ebbefa..3242290eb6a7 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -21,7 +21,7 @@ static int hfi1_ipoib_dev_init(struct net_device *dev) struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); int ret; - priv->netstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); ret = priv->netdev_ops->ndo_init(dev); if (ret) @@ -93,53 +93,12 @@ static int hfi1_ipoib_dev_stop(struct net_device *dev) return priv->netdev_ops->ndo_stop(dev); } -static void hfi1_ipoib_dev_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *storage) -{ - struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); - u64 rx_packets = 0ull; - u64 rx_bytes = 0ull; - u64 tx_packets = 0ull; - u64 tx_bytes = 0ull; - int i; - - netdev_stats_to_stats64(storage, &dev->stats); - - for_each_possible_cpu(i) { - const struct pcpu_sw_netstats *stats; - unsigned int start; - u64 trx_packets; - u64 trx_bytes; - u64 ttx_packets; - u64 ttx_bytes; - - stats = per_cpu_ptr(priv->netstats, i); - do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - trx_packets = stats->rx_packets; - trx_bytes = stats->rx_bytes; - ttx_packets = stats->tx_packets; - ttx_bytes = stats->tx_bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); - - rx_packets += trx_packets; - rx_bytes += trx_bytes; - tx_packets += ttx_packets; - tx_bytes += ttx_bytes; - } - - storage->rx_packets += rx_packets; - storage->rx_bytes += rx_bytes; - storage->tx_packets += tx_packets; - storage->tx_bytes += tx_bytes; -} - static const struct net_device_ops hfi1_ipoib_netdev_ops = { .ndo_init = hfi1_ipoib_dev_init, .ndo_uninit = hfi1_ipoib_dev_uninit, .ndo_open = hfi1_ipoib_dev_open, .ndo_stop = hfi1_ipoib_dev_stop, - .ndo_get_stats64 = hfi1_ipoib_dev_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, }; static int hfi1_ipoib_send(struct net_device *dev, @@ -214,7 +173,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev) hfi1_ipoib_txreq_deinit(priv); hfi1_ipoib_rxq_deinit(priv->netdev); - free_percpu(priv->netstats); + free_percpu(dev->tstats); } static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev) diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 9df292b51a05..edd4eeac8dd1 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -121,7 +121,7 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) struct hfi1_ipoib_dev_priv *priv = tx->priv; if (likely(!tx->sdma_status)) { - hfi1_ipoib_update_tx_netstats(priv, 1, tx->skb->len); + dev_sw_netstats_tx_add(priv->netdev, 1, tx->skb->len); } else { ++priv->netdev->stats.tx_errors; dd_dev_warn(priv->dd, diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 24ca17b77b72..f3fb28e3d5d7 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -1,4 +1,5 @@ /* + * Copyright(c) 2020 Cornelis Networks, Inc. * Copyright(c) 2016 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or @@ -48,23 +49,11 @@ #include <linux/rculist.h> #include <linux/mmu_notifier.h> #include <linux/interval_tree_generic.h> +#include <linux/sched/mm.h> #include "mmu_rb.h" #include "trace.h" -struct mmu_rb_handler { - struct mmu_notifier mn; - struct rb_root_cached root; - void *ops_arg; - spinlock_t lock; /* protect the RB tree */ - struct mmu_rb_ops *ops; - struct mm_struct *mm; - struct list_head lru_list; - struct work_struct del_work; - struct list_head del_list; - struct workqueue_struct *wq; -}; - static unsigned long mmu_node_start(struct mmu_rb_node *); static unsigned long mmu_node_last(struct mmu_rb_node *); static int mmu_notifier_range_start(struct mmu_notifier *, @@ -92,37 +81,36 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node) return PAGE_ALIGN(node->addr + node->len) - 1; } -int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, +int hfi1_mmu_rb_register(void *ops_arg, struct mmu_rb_ops *ops, struct workqueue_struct *wq, struct mmu_rb_handler **handler) { - struct mmu_rb_handler *handlr; + struct mmu_rb_handler *h; int ret; - handlr = kmalloc(sizeof(*handlr), GFP_KERNEL); - if (!handlr) + h = kmalloc(sizeof(*h), GFP_KERNEL); + if (!h) return -ENOMEM; - handlr->root = RB_ROOT_CACHED; - handlr->ops = ops; - handlr->ops_arg = ops_arg; - INIT_HLIST_NODE(&handlr->mn.hlist); - spin_lock_init(&handlr->lock); - handlr->mn.ops = &mn_opts; - handlr->mm = mm; - INIT_WORK(&handlr->del_work, handle_remove); - INIT_LIST_HEAD(&handlr->del_list); - INIT_LIST_HEAD(&handlr->lru_list); - handlr->wq = wq; - - ret = mmu_notifier_register(&handlr->mn, handlr->mm); + h->root = RB_ROOT_CACHED; + h->ops = ops; + h->ops_arg = ops_arg; + INIT_HLIST_NODE(&h->mn.hlist); + spin_lock_init(&h->lock); + h->mn.ops = &mn_opts; + INIT_WORK(&h->del_work, handle_remove); + INIT_LIST_HEAD(&h->del_list); + INIT_LIST_HEAD(&h->lru_list); + h->wq = wq; + + ret = mmu_notifier_register(&h->mn, current->mm); if (ret) { - kfree(handlr); + kfree(h); return ret; } - *handler = handlr; + *handler = h; return 0; } @@ -134,7 +122,7 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) struct list_head del_list; /* Unregister first so we don't get any more notifications. */ - mmu_notifier_unregister(&handler->mn, handler->mm); + mmu_notifier_unregister(&handler->mn, handler->mn.mm); /* * Make sure the wq delete handler is finished running. It will not @@ -166,6 +154,10 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, int ret = 0; trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len); + + if (current->mm != handler->mn.mm) + return -EPERM; + spin_lock_irqsave(&handler->lock, flags); node = __mmu_rb_search(handler, mnode->addr, mnode->len); if (node) { @@ -180,6 +172,7 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, __mmu_int_rb_remove(mnode, &handler->root); list_del(&mnode->list); /* remove from LRU list */ } + mnode->handler = handler; unlock: spin_unlock_irqrestore(&handler->lock, flags); return ret; @@ -217,6 +210,9 @@ bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, unsigned long flags; bool ret = false; + if (current->mm != handler->mn.mm) + return ret; + spin_lock_irqsave(&handler->lock, flags); node = __mmu_rb_search(handler, addr, len); if (node) { @@ -239,6 +235,9 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) unsigned long flags; bool stop = false; + if (current->mm != handler->mn.mm) + return; + INIT_LIST_HEAD(&del_list); spin_lock_irqsave(&handler->lock, flags); @@ -272,6 +271,9 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, { unsigned long flags; + if (current->mm != handler->mn.mm) + return; + /* Validity of handler and node pointers has been checked by caller. */ trace_hfi1_mmu_rb_remove(node->addr, node->len); spin_lock_irqsave(&handler->lock, flags); diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index f04cec1e99d1..423aacc67e94 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h @@ -1,4 +1,5 @@ /* + * Copyright(c) 2020 Cornelis Networks, Inc. * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or @@ -54,6 +55,7 @@ struct mmu_rb_node { unsigned long len; unsigned long __last; struct rb_node node; + struct mmu_rb_handler *handler; struct list_head list; }; @@ -71,7 +73,19 @@ struct mmu_rb_ops { void *evict_arg, bool *stop); }; -int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, +struct mmu_rb_handler { + struct mmu_notifier mn; + struct rb_root_cached root; + void *ops_arg; + spinlock_t lock; /* protect the RB tree */ + struct mmu_rb_ops *ops; + struct list_head lru_list; + struct work_struct del_work; + struct list_head del_list; + struct workqueue_struct *wq; +}; + +int hfi1_mmu_rb_register(void *ops_arg, struct mmu_rb_ops *ops, struct workqueue_struct *wq, struct mmu_rb_handler **handler); diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 356518e17fa6..681bb4e918c9 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -339,6 +339,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) return -EINVAL; if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) return -EINVAL; + break; default: break; } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 04575c9afd61..a307d4c8b15a 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -232,11 +232,11 @@ static const struct sdma_set_state_action sdma_action_table[] = { static void sdma_complete(struct kref *); static void sdma_finalput(struct sdma_state *); static void sdma_get(struct sdma_state *); -static void sdma_hw_clean_up_task(unsigned long); +static void sdma_hw_clean_up_task(struct tasklet_struct *); static void sdma_put(struct sdma_state *); static void sdma_set_state(struct sdma_engine *, enum sdma_states); static void sdma_start_hw_clean_up(struct sdma_engine *); -static void sdma_sw_clean_up_task(unsigned long); +static void sdma_sw_clean_up_task(struct tasklet_struct *); static void sdma_sendctrl(struct sdma_engine *, unsigned); static void init_sdma_regs(struct sdma_engine *, u32, uint); static void sdma_process_event( @@ -545,9 +545,10 @@ static void sdma_err_progress_check(struct timer_list *t) schedule_work(&sde->err_halt_worker); } -static void sdma_hw_clean_up_task(unsigned long opaque) +static void sdma_hw_clean_up_task(struct tasklet_struct *t) { - struct sdma_engine *sde = (struct sdma_engine *)opaque; + struct sdma_engine *sde = from_tasklet(sde, t, + sdma_hw_clean_up_task); u64 statuscsr; while (1) { @@ -604,9 +605,9 @@ static void sdma_flush_descq(struct sdma_engine *sde) sdma_desc_avail(sde, sdma_descq_freecnt(sde)); } -static void sdma_sw_clean_up_task(unsigned long opaque) +static void sdma_sw_clean_up_task(struct tasklet_struct *t) { - struct sdma_engine *sde = (struct sdma_engine *)opaque; + struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task); unsigned long flags; spin_lock_irqsave(&sde->tail_lock, flags); @@ -1454,11 +1455,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) sde->tail_csr = get_kctxt_csr_addr(dd, this_idx, SD(TAIL)); - tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task, - (unsigned long)sde); - - tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task, - (unsigned long)sde); + tasklet_setup(&sde->sdma_hw_clean_up_task, + sdma_hw_clean_up_task); + tasklet_setup(&sde->sdma_sw_clean_up_task, + sdma_sw_clean_up_task); INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait); INIT_WORK(&sde->flush_worker, sdma_field_flush); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 074ec71772d2..5650130e68d4 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -151,7 +151,7 @@ struct hfi1_port_attr { static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf) { - return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); + return sysfs_emit(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); } static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf, @@ -296,7 +296,7 @@ static ssize_t sc2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sc2vl_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); + return sysfs_emit(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); } static const struct sysfs_ops hfi1_sc2vl_ops = { @@ -401,7 +401,7 @@ static ssize_t sl2sc_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sl2sc_kobj); struct hfi1_ibport *ibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); + return sysfs_emit(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); } static const struct sysfs_ops hfi1_sl2sc_ops = { @@ -475,7 +475,7 @@ static ssize_t vl2mtu_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu); + return sysfs_emit(buf, "%u\n", dd->vld[vlattr->vl].mtu); } static const struct sysfs_ops hfi1_vl2mtu_ops = { @@ -500,7 +500,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -510,13 +510,11 @@ static ssize_t board_id_show(struct device *device, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(board_id); @@ -528,7 +526,7 @@ static ssize_t boardversion_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -545,9 +543,9 @@ static ssize_t nctxts_show(struct device *device, * and a receive context, so returning the smaller of the two counts * give a more accurate picture of total contexts available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_user_contexts, - (u32)dd->sc_sizes[SC_USER].count)); + return sysfs_emit(buf, "%u\n", + min(dd->num_user_contexts, + (u32)dd->sc_sizes[SC_USER].count)); } static DEVICE_ATTR_RO(nctxts); @@ -559,7 +557,7 @@ static ssize_t nfreectxts_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); @@ -570,7 +568,8 @@ static ssize_t serial_show(struct device *device, rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); + /* dd->serial is already newline terminated in chip.c */ + return sysfs_emit(buf, "%s", dd->serial); } static DEVICE_ATTR_RO(serial); @@ -598,9 +597,8 @@ static DEVICE_ATTR_WO(chip_reset); * Convert the reported temperature from an integer (reported in * units of 0.25C) to a floating point number. */ -#define temp2str(temp, buf, size, idx) \ - scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \ - ((temp) >> 2), ((temp) & 0x3) * 25) +#define temp_d(t) ((t) >> 2) +#define temp_f(t) (((t)&0x3) * 25u) /* * Dump tempsense values, in decimal, to ease shell-scripts. @@ -615,19 +613,17 @@ static ssize_t tempsense_show(struct device *device, int ret; ret = hfi1_tempsense_rd(dd, &temp); - if (!ret) { - int idx = 0; - - idx += temp2str(temp.curr, buf, PAGE_SIZE, idx); - idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx); - idx += scnprintf(buf + idx, PAGE_SIZE - idx, - "%u %u %u\n", temp.triggers & 0x1, - temp.triggers & 0x2, temp.triggers & 0x4); - ret = idx; - } - return ret; + if (ret) + return ret; + + return sysfs_emit(buf, "%u.%02u %u.%02u %u.%02u %u.%02u %u %u %u\n", + temp_d(temp.curr), temp_f(temp.curr), + temp_d(temp.lo_lim), temp_f(temp.lo_lim), + temp_d(temp.hi_lim), temp_f(temp.hi_lim), + temp_d(temp.crit_lim), temp_f(temp.crit_lim), + temp.triggers & 0x1, + temp.triggers & 0x2, + temp.triggers & 0x4); } static DEVICE_ATTR_RO(tempsense); @@ -817,7 +813,7 @@ static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf) if (vl < 0) return vl; - return snprintf(buf, PAGE_SIZE, "%d\n", vl); + return sysfs_emit(buf, "%d\n", vl); } static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO, diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 73d197e21730..92aa2a9b3b5a 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2826,6 +2826,7 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3005,6 +3006,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3221,6 +3223,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } @@ -3239,9 +3242,11 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } + break; default: break; } diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index f81ca20f4b69..b94fc7fd75a9 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -1,4 +1,5 @@ /* + * Copyright(c) 2020 Cornelis Networks, Inc. * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or @@ -173,15 +174,18 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd, { struct page **pages; struct hfi1_devdata *dd = fd->uctxt->dd; + struct mm_struct *mm; if (mapped) { pci_unmap_single(dd->pcidev, node->dma_addr, node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); pages = &node->pages[idx]; + mm = mm_from_tid_node(node); } else { pages = &tidbuf->pages[idx]; + mm = current->mm; } - hfi1_release_user_pages(fd->mm, pages, npages, mapped); + hfi1_release_user_pages(mm, pages, npages, mapped); fd->tid_n_pinned -= npages; } @@ -216,12 +220,12 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) * pages, accept the amount pinned so far and program only that. * User space knows how to deal with partially programmed buffers. */ - if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { + if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) { kfree(pages); return -ENOMEM; } - pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); + pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages); if (pinned <= 0) { kfree(pages); return pinned; @@ -756,7 +760,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, if (fd->use_mn) { ret = mmu_interval_notifier_insert( - &node->notifier, fd->mm, + &node->notifier, current->mm, tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, &tid_mn_ops); if (ret) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 332abb446861..d45c7b6988d4 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -1,6 +1,7 @@ #ifndef _HFI1_USER_EXP_RCV_H #define _HFI1_USER_EXP_RCV_H /* + * Copyright(c) 2020 - Cornelis Networks, Inc. * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or @@ -95,4 +96,9 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo); +static inline struct mm_struct *mm_from_tid_node(struct tid_rb_node *node) +{ + return node->notifier.mm; +} + #endif /* _HFI1_USER_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index a92346e88628..4a4956f96a7e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1,4 +1,5 @@ /* + * Copyright(c) 2020 - Cornelis Networks, Inc. * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or @@ -188,7 +189,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, atomic_set(&pq->n_reqs, 0); init_waitqueue_head(&pq->wait); atomic_set(&pq->n_locked, 0); - pq->mm = fd->mm; iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue, activate_packet_queue, NULL, NULL); @@ -230,7 +230,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, cq->nentries = hfi1_sdma_comp_ring_size; - ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq, + ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq, &pq->handler); if (ret) { dd_dev_err(dd, "Failed to register with MMU %d", ret); @@ -980,13 +980,13 @@ static int pin_sdma_pages(struct user_sdma_request *req, npages -= node->npages; retry: - if (!hfi1_can_pin_pages(pq->dd, pq->mm, + if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked), npages)) { cleared = sdma_cache_evict(pq, npages); if (cleared >= npages) goto retry; } - pinned = hfi1_acquire_user_pages(pq->mm, + pinned = hfi1_acquire_user_pages(current->mm, ((unsigned long)iovec->iov.iov_base + (node->npages * PAGE_SIZE)), npages, 0, pages + node->npages); @@ -995,7 +995,7 @@ retry: return pinned; } if (pinned != npages) { - unpin_vector_pages(pq->mm, pages, node->npages, pinned); + unpin_vector_pages(current->mm, pages, node->npages, pinned); return -EFAULT; } kfree(node->pages); @@ -1008,7 +1008,8 @@ retry: static void unpin_sdma_pages(struct sdma_mmu_node *node) { if (node->npages) { - unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); + unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0, + node->npages); atomic_sub(node->npages, &node->pq->n_locked); } } diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 9972e0e6545e..1e8c02fe8ad1 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -1,6 +1,7 @@ #ifndef _HFI1_USER_SDMA_H #define _HFI1_USER_SDMA_H /* + * Copyright(c) 2020 - Cornelis Networks, Inc. * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or @@ -133,7 +134,6 @@ struct hfi1_user_sdma_pkt_q { unsigned long unpinned; struct mmu_rb_handler *handler; atomic_t n_locked; - struct mm_struct *mm; }; struct hfi1_user_sdma_comp_q { @@ -250,4 +250,9 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct iovec *iovec, unsigned long dim, unsigned long *count); +static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node) +{ + return node->rb.handler->mn.mm; +} + #endif /* _HFI1_USER_SDMA_H */ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 30865635b449..3591923abebb 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1424,7 +1424,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->gid_tbl_len = HFI1_GUIDS_PER_PORT; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ - props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); + props->active_speed = opa_speed_to_ib(ppd->link_speed_active); props->max_vl_num = ppd->vls_supported; /* Once we are a "first class" citizen and have added the OPA MTUs to diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 5b2f9314edd3..cc258edec331 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -31,56 +31,65 @@ */ #include <linux/platform_device.h> +#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include "hns_roce_device.h" -#define HNS_ROCE_PORT_NUM_SHIFT 24 -#define HNS_ROCE_VLAN_SL_BIT_MASK 7 -#define HNS_ROCE_VLAN_SL_SHIFT 13 +static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) +{ + u32 fl = ah_attr->grh.flow_label; + u16 sport; + + if (!fl) + sport = get_random_u32() % + (IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 - + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) + + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN; + else + sport = rdma_flow_label_to_udp_sport(fl); + + return sport; +} int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); - const struct ib_gid_attr *gid_attr; - struct device *dev = hr_dev->dev; - struct hns_roce_ah *ah = to_hr_ah(ibah); struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - u16 vlan_id = 0xffff; - bool vlan_en = false; - int ret; - - gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); - if (ret) - return ret; - - /* Get mac address */ - memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); + struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); + struct hns_roce_ah *ah = to_hr_ah(ibah); + int ret = 0; - if (vlan_id < VLAN_N_VID) { - vlan_en = true; - vlan_id |= (rdma_ah_get_sl(ah_attr) & - HNS_ROCE_VLAN_SL_BIT_MASK) << - HNS_ROCE_VLAN_SL_SHIFT; - } + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && udata) + return -EOPNOTSUPP; ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan_id = vlan_id; - ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, - ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; - memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); + ah->av.hop_limit = grh->hop_limit; + ah->av.flowlabel = grh->flow_label; + ah->av.udp_sport = get_ah_udp_sport(ah_attr); ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.tclass = get_tclass(grh); - return 0; + memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); + + /* HIP08 needs to record vlan info in Address Vector */ + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { + ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, + &ah->av.vlan_id, NULL); + if (ret) + return ret; + + ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; + } + + return ret; } int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) @@ -98,8 +107,3 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } - -void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) -{ - return; -} diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index a522cb2d29ea..4bcaaa0524b1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -159,76 +159,96 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { - struct device *dev = hr_dev->dev; - u32 size = buf->size; - int i; + struct hns_roce_buf_list *trunks; + u32 i; - if (size == 0) + if (!buf) return; - buf->size = 0; + trunks = buf->trunk_list; + if (trunks) { + buf->trunk_list = NULL; + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift, + trunks[i].buf, trunks[i].map); - if (hns_roce_buf_is_direct(buf)) { - dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); - } else { - for (i = 0; i < buf->npages; ++i) - if (buf->page_list[i].buf) - dma_free_coherent(dev, 1 << buf->page_shift, - buf->page_list[i].buf, - buf->page_list[i].map); - kfree(buf->page_list); - buf->page_list = NULL; + kfree(trunks); } + + kfree(buf); } -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift) +/* + * Allocate the dma buffer for storing ROCEE table entries + * + * @size: required size + * @page_shift: the unit size in a continuous dma address range + * @flags: HNS_ROCE_BUF_ flags to control the allocation flow. + */ +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags) { - struct hns_roce_buf_list *buf_list; - struct device *dev = hr_dev->dev; - u32 page_size; - int i; + u32 trunk_size, page_size, alloced_size; + struct hns_roce_buf_list *trunks; + struct hns_roce_buf *buf; + gfp_t gfp_flags; + u32 ntrunk, i; /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */ - buf->page_shift = max_t(int, HNS_HW_PAGE_SHIFT, page_shift); + if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT)) + return ERR_PTR(-EINVAL); + + gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL; + buf = kzalloc(sizeof(*buf), gfp_flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->page_shift = page_shift; page_size = 1 << buf->page_shift; - buf->npages = DIV_ROUND_UP(size, page_size); - - /* required size is not bigger than one trunk size */ - if (size <= max_direct) { - buf->page_list = NULL; - buf->direct.buf = dma_alloc_coherent(dev, size, - &buf->direct.map, - GFP_KERNEL); - if (!buf->direct.buf) - return -ENOMEM; + + /* Calc the trunk size and num by required size and page_shift */ + if (flags & HNS_ROCE_BUF_DIRECT) { + buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE)); + ntrunk = 1; } else { - buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL); - if (!buf_list) - return -ENOMEM; - - for (i = 0; i < buf->npages; i++) { - buf_list[i].buf = dma_alloc_coherent(dev, page_size, - &buf_list[i].map, - GFP_KERNEL); - if (!buf_list[i].buf) - break; - } + buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE)); + ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); + } - if (i != buf->npages && i > 0) { - while (i-- > 0) - dma_free_coherent(dev, page_size, - buf_list[i].buf, - buf_list[i].map); - kfree(buf_list); - return -ENOMEM; - } - buf->page_list = buf_list; + trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags); + if (!trunks) { + kfree(buf); + return ERR_PTR(-ENOMEM); } - buf->size = size; - return 0; + trunk_size = 1 << buf->trunk_shift; + alloced_size = 0; + for (i = 0; i < ntrunk; i++) { + trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size, + &trunks[i].map, gfp_flags); + if (!trunks[i].buf) + break; + + alloced_size += trunk_size; + } + + buf->ntrunks = i; + + /* In nofail mode, it's only failed when the alloced size is 0 */ + if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) { + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, trunk_size, + trunks[i].buf, trunks[i].map); + + kfree(trunks); + kfree(buf); + return ERR_PTR(-ENOMEM); + } + + buf->npages = DIV_ROUND_UP(alloced_size, page_size); + buf->trunk_list = trunks; + + return buf; } int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, @@ -240,7 +260,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, end = start + buf_cnt; if (end > buf->npages) { dev_err(hr_dev->dev, - "Failed to check kmem bufs, end %d + %d total %d!\n", + "failed to check kmem bufs, end %d + %d total %u!\n", start, buf_cnt, buf->npages); return -EINVAL; } @@ -262,14 +282,13 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, u64 addr; if (page_shift < HNS_HW_PAGE_SHIFT) { - dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", + dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", page_shift); return -EINVAL; } /* convert system page cnt to hw page cnt */ - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, - 1 << page_shift) { + rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) { addr = rdma_block_iter_dma_address(&biter); if (idx >= start) { bufs[total++] = addr; diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 455d533dd7c4..339e3fd98b0b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -36,9 +36,9 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" -#define CMD_POLL_TOKEN 0xffff -#define CMD_MAX_NUM 32 -#define CMD_TOKEN_MASK 0x1f +#define CMD_POLL_TOKEN 0xffff +#define CMD_MAX_NUM 32 +#define CMD_TOKEN_MASK 0x1f static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, @@ -60,7 +60,7 @@ static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { struct device *dev = hr_dev->dev; int ret; @@ -78,7 +78,7 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned long timeout) + u8 op_modifier, u16 op, unsigned int timeout) { int ret; @@ -93,8 +93,8 @@ static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, u64 out_param) { - struct hns_roce_cmd_context - *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask]; + struct hns_roce_cmd_context *context = + &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds]; if (token != context->token) return; @@ -108,7 +108,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { struct hns_roce_cmdq *cmd = &hr_dev->cmd; struct hns_roce_cmd_context *context; @@ -159,13 +159,13 @@ out: static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned long timeout) + u8 op_modifier, u16 op, unsigned int timeout) { int ret; down(&hr_dev->cmd.event_sem); - ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, timeout); + ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, timeout); up(&hr_dev->cmd.event_sem); return ret; @@ -173,7 +173,7 @@ static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { int ret; @@ -231,9 +231,8 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; int i; - hr_cmd->context = kmalloc_array(hr_cmd->max_cmds, - sizeof(*hr_cmd->context), - GFP_KERNEL); + hr_cmd->context = + kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); if (!hr_cmd->context) return -ENOMEM; @@ -262,8 +261,8 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) hr_cmd->use_events = 0; } -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) { struct hns_roce_cmd_mailbox *mailbox; @@ -271,8 +270,8 @@ struct hns_roce_cmd_mailbox if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, - &mailbox->dma); + mailbox->buf = + dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 1915bacaded0..8025e7f657fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -141,10 +141,10 @@ enum { int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout); + unsigned int timeout); -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index f5669ff8cfeb..5afee04fb02c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -38,21 +38,33 @@ #define roce_raw_write(value, addr) \ __raw_writel((__force u32)cpu_to_le32(value), (addr)) -#define roce_get_field(origin, mask, shift) \ - (((le32_to_cpu(origin)) & (mask)) >> (shift)) +#define roce_get_field(origin, mask, shift) \ + ((le32_to_cpu(origin) & (mask)) >> (u32)(shift)) #define roce_get_bit(origin, shift) \ roce_get_field((origin), (1ul << (shift)), (shift)) -#define roce_set_field(origin, mask, shift, val) \ - do { \ - (origin) &= ~cpu_to_le32(mask); \ - (origin) |= cpu_to_le32(((u32)(val) << (shift)) & (mask)); \ +#define roce_set_field(origin, mask, shift, val) \ + do { \ + (origin) &= ~cpu_to_le32(mask); \ + (origin) |= cpu_to_le32(((u32)(val) << (u32)(shift)) & (mask)); \ } while (0) -#define roce_set_bit(origin, shift, val) \ +#define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) +#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l + +#define _hr_reg_enable(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) |= \ + cpu_to_le32(BIT((field_l) % 32)) + \ + BUILD_BUG_ON_ZERO((field_h) != (field_l)); \ + }) + +#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index e87d616f7988..8533fc2d8df2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -36,43 +36,42 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" -#include <rdma/hns-abi.h> #include "hns_roce_common.h" static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_cq_table *cq_table; - struct ib_device *ibdev = &hr_dev->ib_dev; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; int ret; ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), &dma_handle); - if (ret < 1) { - ibdev_err(ibdev, "Failed to find CQ mtr\n"); + if (!ret) { + ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret); return -EINVAL; } cq_table = &hr_dev->cq_table; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n", + ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n", hr_cq->cqn, ret); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to xa_store CQ\n"); + ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret); goto err_put; } @@ -91,7 +90,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, - "Failed to send create cmd for CQ(0x%lx), err %d\n", + "failed to send create cmd for CQ(0x%lx), ret = %d.\n", hr_cq->cqn, ret); goto err_xa; } @@ -147,21 +146,21 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret; buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; - buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; buf_attr.fixed_page = true; - err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); + if (ret) + ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret); - return err; + return ret; } static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) @@ -224,6 +223,21 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, } } +static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, + struct hns_roce_ib_create_cq *ucmd) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); + + if (udata) { + if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) + hr_cq->cqe_size = ucmd->cqe_size; + else + hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE; + } else { + hr_cq->cqe_size = hr_dev->caps.cqe_sz; + } +} + int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { @@ -236,14 +250,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, u32 cq_entries = attr->cqe; int ret; + if (attr->flags) + return -EOPNOTSUPP; + if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", + ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n", cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n", + ibdev_err(ibdev, "failed to check CQ vector = %d, max = %d.\n", vector, hr_dev->caps.num_comp_vectors); return -EINVAL; } @@ -258,36 +275,40 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, INIT_LIST_HEAD(&hr_cq->rq_list); if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n", ret); return ret; } } + set_cqe_size(hr_cq, udata, &ucmd); + ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret); return ret; } ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret); goto err_cq_buf; } ret = alloc_cqc(hr_dev, hr_cq); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc CQ context, ret = %d.\n", ret); goto err_cq_db; } /* * For the QP created by kernel space, tptr value should be initialized * to zero; For the QP created by user space, it will cause synchronous - * problems if tptr is set to zero here, so we initialze it in user + * problems if tptr is set to zero here, so we initialize it in user * space. */ if (!udata && hr_cq->tptr_addr) @@ -295,7 +316,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (udata) { resp.cqn = hr_cq->cqn; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto err_cqc; } @@ -311,7 +333,7 @@ err_cq_buf: return ret; } -void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -322,6 +344,7 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) free_cq_buf(hr_dev, hr_cq); free_cq_db(hr_dev, hr_cq, udata); free_cqc(hr_dev, hr_cq); + return 0; } void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index bff6abdccfb0..5cb7376ce978 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -95,8 +95,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, struct hns_roce_db *db, int order) { - int o; - int i; + unsigned long o; + unsigned long i; for (o = order; o <= 1; ++o) { i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o); @@ -154,8 +154,8 @@ out: void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) { - int o; - int i; + unsigned long o; + unsigned long i; mutex_lock(&hr_dev->pgdir_mutex); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6edcbdcd8f43..ad8253245a85 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -34,11 +34,12 @@ #define _HNS_ROCE_DEVICE_H #include <rdma/ib_verbs.h> +#include <rdma/hns-abi.h> #define DRV_NAME "hns_roce" -/* hip08 is a pci device */ #define PCI_REVISION_ID_HIP08 0x21 +#define PCI_REVISION_ID_HIP09 0x30 #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') @@ -57,7 +58,6 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 #define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000 -#define HNS_ROCE_MAX_SGE_NUM 2 #define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20 #define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \ @@ -76,15 +76,18 @@ #define HNS_ROCE_CEQ 0 #define HNS_ROCE_AEQ 1 -#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4 -#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10 +#define HNS_ROCE_CEQE_SIZE 0x4 +#define HNS_ROCE_AEQE_SIZE 0x10 -#define HNS_ROCE_SL_SHIFT 28 -#define HNS_ROCE_TCLASS_SHIFT 20 -#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff +#define HNS_ROCE_V3_EQE_SIZE 0x40 + +#define HNS_ROCE_V2_CQE_SIZE 32 +#define HNS_ROCE_V3_CQE_SIZE 64 + +#define HNS_ROCE_V2_QPC_SZ 256 +#define HNS_ROCE_V3_QPC_SZ 512 #define HNS_ROCE_MAX_PORTS 6 -#define HNS_ROCE_MAX_GID_NUM 16 #define HNS_ROCE_GID_SIZE 16 #define HNS_ROCE_SGE_SIZE 16 @@ -112,11 +115,11 @@ #define PAGES_SHIFT_24 24 #define PAGES_SHIFT_32 32 -#define HNS_ROCE_PCI_BAR_NUM 2 - #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 +#define HNS_ROCE_QP_BANK_NUM 8 + /* The chip implementation of the consumer index is calculated * according to twice the actual EQ depth */ @@ -129,15 +132,6 @@ enum { SERV_TYPE_UD, }; -enum { - HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), - HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), -}; - -enum hns_roce_cq_flags { - HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0), -}; - enum hns_roce_qp_state { HNS_ROCE_QP_STATE_RST, HNS_ROCE_QP_STATE_INIT, @@ -166,7 +160,6 @@ enum hns_roce_event { /* 0x10 and 0x11 is unused in currently application case */ HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12, HNS_ROCE_EVENT_TYPE_MB = 0x13, - HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14, HNS_ROCE_EVENT_TYPE_FLR = 0x15, }; @@ -221,6 +214,8 @@ enum { HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), + HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), + HNS_ROCE_CAP_FLAG_STASH = BIT(17), }; #define HNS_ROCE_DB_TYPE_COUNT 2 @@ -265,9 +260,6 @@ enum { #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) -/* The minimum page count for hardware access page directly. */ -#define HNS_HW_DIRECT_PAGE_COUNT 2 - struct hns_roce_uar { u64 pfn; unsigned long index; @@ -318,7 +310,7 @@ struct hns_roce_hem_table { }; struct hns_roce_buf_region { - int offset; /* page offset */ + u32 offset; /* page offset */ u32 count; /* page count */ int hopnum; /* addressing hop num */ }; @@ -338,10 +330,10 @@ struct hns_roce_buf_attr { size_t size; /* region size */ int hopnum; /* multi-hop addressing hop num */ } region[HNS_ROCE_MAX_BT_REGION]; - int region_count; /* valid region count */ + unsigned int region_count; /* valid region count */ unsigned int page_shift; /* buffer page shift */ bool fixed_page; /* decide page shift is fixed-size or maximum size */ - int user_access; /* umem access flag */ + unsigned int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ }; @@ -352,7 +344,7 @@ struct hns_roce_hem_cfg { unsigned int buf_pg_shift; /* buffer page shift */ unsigned int buf_pg_count; /* buffer page count */ struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; - int region_count; + unsigned int region_count; }; /* memory translate region */ @@ -400,7 +392,7 @@ struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; u32 wqe_cnt; /* WQE num */ - int max_gs; + u32 max_gs; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -419,11 +411,26 @@ struct hns_roce_buf_list { dma_addr_t map; }; +/* + * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous + * dma address range. + * + * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep. + * + * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even + * the allocated size is smaller than the required size. + */ +enum { + HNS_ROCE_BUF_DIRECT = BIT(0), + HNS_ROCE_BUF_NOSLEEP = BIT(1), + HNS_ROCE_BUF_NOFAIL = BIT(2), +}; + struct hns_roce_buf { - struct hns_roce_buf_list direct; - struct hns_roce_buf_list *page_list; + struct hns_roce_buf_list *trunk_list; + u32 ntrunks; u32 npages; - u32 size; + unsigned int trunk_shift; unsigned int page_shift; }; @@ -451,8 +458,8 @@ struct hns_roce_db { } u; dma_addr_t dma; void *virt_addr; - int index; - int order; + unsigned long index; + unsigned long order; }; struct hns_roce_cq { @@ -467,6 +474,7 @@ struct hns_roce_cq { void __iomem *cq_db_l; u16 *tptr_addr; int arm_sn; + int cqe_size; unsigned long cqn; u32 vector; atomic_t refcount; @@ -499,8 +507,8 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - int head; - int tail; + u16 head; + u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; @@ -509,13 +517,22 @@ struct hns_roce_uar_table { struct hns_roce_bitmap bitmap; }; +struct hns_roce_bank { + struct ida ida; + u32 inuse; /* Number of IDs allocated */ + u32 min; /* Lowest ID to allocate. */ + u32 max; /* Highest ID to allocate. */ + u32 next; /* Next ID to allocate. */ +}; + struct hns_roce_qp_table { - struct hns_roce_bitmap bitmap; struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; struct hns_roce_hem_table trrl_table; struct hns_roce_hem_table sccc_table; struct mutex scc_mutex; + struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; + struct mutex bank_mutex; }; struct hns_roce_cq_table { @@ -535,17 +552,18 @@ struct hns_roce_raq_table { }; struct hns_roce_av { - u8 port; - u8 gid_index; - u8 stat_rate; - u8 hop_limit; - u32 flowlabel; - u8 sl; - u8 tclass; - u8 dgid[HNS_ROCE_GID_SIZE]; - u8 mac[ETH_ALEN]; - u16 vlan_id; - bool vlan_en; + u8 port; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + u32 flowlabel; + u16 udp_sport; + u8 sl; + u8 tclass; + u8 dgid[HNS_ROCE_GID_SIZE]; + u8 mac[ETH_ALEN]; + u16 vlan_id; + u8 vlan_en; }; struct hns_roce_ah { @@ -617,10 +635,9 @@ enum { struct hns_roce_work { struct hns_roce_dev *hr_dev; struct work_struct work; - u32 qpn; - u32 cqn; int event_type; int sub_type; + u32 queue_num; }; struct hns_roce_qp { @@ -655,6 +672,8 @@ struct hns_roce_qp { struct hns_roce_sge sge; u32 next_sge; + enum ib_mtu path_mtu; + u32 max_inline_data; /* 0: flush needed, 1: unneeded */ unsigned long flush_flag; @@ -678,35 +697,18 @@ enum { }; struct hns_roce_ceqe { - __le32 comp; + __le32 comp; + __le32 rsv[15]; }; struct hns_roce_aeqe { __le32 asyn; union { struct { - __le32 qp; - u32 rsv0; - u32 rsv1; - } qp_event; - - struct { - __le32 srq; - u32 rsv0; - u32 rsv1; - } srq_event; - - struct { - __le32 cq; + __le32 num; u32 rsv0; u32 rsv1; - } cq_event; - - struct { - __le32 ceqe; - u32 rsv0; - u32 rsv1; - } ce_event; + } queue_event; struct { __le64 out_param; @@ -715,6 +717,7 @@ struct hns_roce_aeqe { u8 rsv0; } __packed cmd; } event; + __le32 rsv[12]; }; struct hns_roce_eq { @@ -724,11 +727,11 @@ struct hns_roce_eq { int type_flag; /* Aeq:1 ceq:0 */ int eqn; u32 entries; - int log_entries; + u32 log_entries; int eqe_size; int irq; int log_page_size; - int cons_index; + u32 cons_index; struct hns_roce_buf_list *buf_list; int over_ignore; int coalesce; @@ -736,7 +739,7 @@ struct hns_roce_eq { int hop_num; struct hns_roce_mtr mtr; u16 eq_max_cnt; - int eq_period; + u32 eq_period; int shift; int event_type; int sub_type; @@ -759,8 +762,8 @@ struct hns_roce_caps { u32 max_sq_inline; u32 max_rq_sg; u32 max_extend_sg; - int num_qps; - int reserved_qps; + u32 num_qps; + u32 reserved_qps; int num_qpc_timer; int num_cqc_timer; int num_srqs; @@ -772,7 +775,7 @@ struct hns_roce_caps { u32 max_srq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - int num_cqs; + u32 num_cqs; u32 max_cqes; u32 min_cqes; u32 min_wqes; @@ -781,7 +784,7 @@ struct hns_roce_caps { int num_aeq_vectors; int num_comp_vectors; int num_other_vectors; - int num_mtpts; + u32 num_mtpts; u32 num_mtt_segs; u32 num_cqe_segs; u32 num_srqwqe_segs; @@ -791,15 +794,15 @@ struct hns_roce_caps { int num_pds; int reserved_pds; u32 mtt_entry_sz; - u32 cq_entry_sz; + u32 cqe_sz; u32 page_size_cap; u32 reserved_lkey; int mtpt_entry_sz; - int qpc_entry_sz; + int qpc_sz; int irrl_entry_sz; int trrl_entry_sz; int cqc_entry_sz; - int sccc_entry_sz; + int sccc_sz; int qpc_timer_entry_sz; int cqc_timer_entry_sz; int srqc_entry_sz; @@ -809,6 +812,8 @@ struct hns_roce_caps { u32 pbl_hop_num; int aeqe_depth; int ceqe_depth; + u32 aeqe_size; + u32 ceqe_size; enum ib_mtu max_mtu; u32 qpc_bt_num; u32 qpc_timer_bt_num; @@ -817,6 +822,7 @@ struct hns_roce_caps { u32 cqc_timer_bt_num; u32 mpt_bt_num; u32 sccc_bt_num; + u32 gmv_bt_num; u32 qpc_ba_pg_sz; u32 qpc_buf_pg_sz; u32 qpc_hop_num; @@ -856,6 +862,11 @@ struct hns_roce_caps { u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; + u32 gmv_entry_num; + u32 gmv_entry_sz; + u32 gmv_ba_pg_sz; + u32 gmv_buf_pg_sz; + u32 gmv_hop_num; u32 sl_num; u32 tsq_buf_pg_sz; u32 tpq_buf_pg_sz; @@ -890,7 +901,7 @@ struct hns_roce_hw { int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event); - int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); + int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned int timeout); int (*rst_prc_mbox)(struct hns_roce_dev *hr_dev); int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr); @@ -930,7 +941,7 @@ struct hns_roce_hw { int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct ib_udata *udata); - void (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); + int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); @@ -991,6 +1002,10 @@ struct hns_roce_dev { struct hns_roce_eq_table eq_table; struct hns_roce_hem_table qpc_timer_table; struct hns_roce_hem_table cqc_timer_table; + /* GMV is the memory area that the driver allocates for the hardware + * to store SGID, SMAC and VLAN information. + */ + struct hns_roce_hem_table gmv_table; int cmd_mod; int loop_idc; @@ -1061,29 +1076,19 @@ static inline struct hns_roce_qp return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } -static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf) +static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, + unsigned int offset) { - if (buf->page_list) - return false; - - return true; + return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) + + (offset & ((1 << buf->trunk_shift) - 1)); } -static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) +static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) { - if (hns_roce_buf_is_direct(buf)) - return (char *)(buf->direct.buf) + (offset & (buf->size - 1)); - - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + - (offset & ((1 << buf->page_shift) - 1)); -} + unsigned int offset = idx << buf->page_shift; -static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) -{ - if (hns_roce_buf_is_direct(buf)) - return buf->direct.map + ((dma_addr_t)idx << buf->page_shift); - else - return buf->page_list[idx].map; + return buf->trunk_list[offset >> buf->trunk_shift].map + + (offset & ((1 << buf->trunk_shift) - 1)); } #define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) @@ -1124,6 +1129,14 @@ static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) return ilog2(to_hr_hem_entries_count(count, buf_shift)); } +#define DSCP_SHIFT 2 + +static inline u8 get_tclass(const struct ib_global_route *grh) +{ + return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ? + grh->traffic_class >> DSCP_SHIFT : grh->traffic_class; +} + int hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); @@ -1147,7 +1160,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int page_cnt); + dma_addr_t *pages, unsigned int page_cnt); int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); @@ -1178,18 +1191,22 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); +static inline int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return 0; +} int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, @@ -1200,13 +1217,12 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, unsigned long mpt_index); unsigned long key_to_hw_index(u32 key); -struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, - struct ib_udata *udata); +int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift); +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags); int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf); @@ -1220,7 +1236,7 @@ int hns_roce_create_srq(struct ib_srq *srq, int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, @@ -1228,10 +1244,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); -void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n); -void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n); -void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n); -bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n); +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n); +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n); +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq); enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, @@ -1247,7 +1263,7 @@ int to_hr_qp_type(int qp_type); int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db); @@ -1261,7 +1277,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); -int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index c8db6f8ae018..edc9d6b98d95 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -75,6 +75,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) case HEM_TYPE_CQC_TIMER: hop_num = hr_dev->caps.cqc_timer_hop_num; break; + case HEM_TYPE_GMV: + hop_num = hr_dev->caps.gmv_hop_num; + break; default: return false; } @@ -183,8 +186,16 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; mhop->hop_num = hr_dev->caps.srqc_hop_num; break; + case HEM_TYPE_GMV: + mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.gmv_bt_num; + mhop->hop_num = hr_dev->caps.gmv_hop_num; + break; default: - dev_err(dev, "Table %d not support multi-hop addressing!\n", + dev_err(dev, "table %u not support multi-hop addressing!\n", type); return -EINVAL; } @@ -198,9 +209,9 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; u32 chunk_ba_num; + u32 chunk_size; u32 table_idx; u32 bt_num; - u32 chunk_size; if (get_hem_table_config(hr_dev, mhop, table->type)) return -EINVAL; @@ -232,8 +243,8 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->l0_idx = table_idx; break; default: - dev_err(dev, "Table %d not support hop_num = %d!\n", - table->type, mhop->hop_num); + dev_err(dev, "table %u not support hop_num = %u!\n", + table->type, mhop->hop_num); return -EINVAL; } if (mhop->l0_idx >= mhop->ba_l0_num) @@ -332,15 +343,15 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, { spinlock_t *lock = &hr_dev->bt_cmd_lock; struct device *dev = hr_dev->dev; - long end; - unsigned long flags; struct hns_roce_hem_iter iter; void __iomem *bt_cmd; __le32 bt_cmd_val[2]; __le32 bt_cmd_h = 0; - __le32 bt_cmd_l = 0; - u64 bt_ba = 0; + unsigned long flags; + __le32 bt_cmd_l; int ret = 0; + u64 bt_ba; + long end; /* Find the HEM(Hardware Entry Memory) entry */ unsigned long i = (obj & (table->num_obj - 1)) / @@ -438,13 +449,13 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev, index->buf = l0_idx; break; default: - ibdev_err(ibdev, "Table %d not support mhop.hop_num = %d!\n", + ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n", table->type, mhop->hop_num); return -EINVAL; } if (unlikely(index->buf >= table->num_hem)) { - ibdev_err(ibdev, "Table %d exceed hem limt idx %llu,max %lu!\n", + ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n", table->type, index->buf, table->num_hem); return -EINVAL; } @@ -640,8 +651,8 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { struct device *dev = hr_dev->dev; - int ret = 0; unsigned long i; + int ret = 0; if (hns_roce_check_whether_mhop(hr_dev, table->type)) return hns_roce_table_mhop_get(hr_dev, table, obj); @@ -714,15 +725,15 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, step_idx = hop_num; if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx)) - ibdev_warn(ibdev, "Clear hop%d HEM failed.\n", hop_num); + ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num); if (index->inited & HEM_INDEX_L1) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) - ibdev_warn(ibdev, "Clear HEM step 1 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 1.\n"); if (index->inited & HEM_INDEX_L0) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - ibdev_warn(ibdev, "Clear HEM step 0 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 0.\n"); } } @@ -789,14 +800,14 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_chunk *chunk; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; - void *addr = NULL; unsigned long mhop_obj = obj; unsigned long obj_per_chunk; unsigned long idx_offset; int offset, dma_offset; + void *addr = NULL; + u32 hem_idx = 0; int length; int i, j; - u32 hem_idx = 0; if (!table->lowmem) return NULL; @@ -876,7 +887,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, unsigned long buf_chunk_size; unsigned long bt_chunk_size; unsigned long bt_chunk_num; - unsigned long num_bt_l0 = 0; + unsigned long num_bt_l0; u32 hop_num; if (get_hem_table_config(hr_dev, &mhop, type)) @@ -966,8 +977,8 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, { struct hns_roce_hem_mhop mhop; u32 buf_chunk_size; - int i; u64 obj; + int i; if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) return; @@ -1017,7 +1028,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); @@ -1027,12 +1038,16 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.cqc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); - if (hr_dev->caps.sccc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.trrl_table); + + if (hr_dev->caps.gmv_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); @@ -1234,7 +1249,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, } if (offset < r->offset) { - dev_err(hr_dev->dev, "invalid offset %d,min %d!\n", + dev_err(hr_dev->dev, "invalid offset %d, min %u!\n", offset, r->offset); return -EINVAL; } @@ -1298,8 +1313,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, const struct hns_roce_buf_region *regions, int region_cnt) { - struct roce_hem_item *hem, *temp_hem, *root_hem; struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; + struct roce_hem_item *hem, *temp_hem, *root_hem; const struct hns_roce_buf_region *r; struct list_head temp_root; struct list_head temp_btm; @@ -1404,8 +1419,8 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; - int ret = 0; int unit; + int ret; int i; if (region_cnt > HNS_ROCE_MAX_BT_REGION) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index b34c940077bb..13fdeb3274e7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -47,6 +47,7 @@ enum { HEM_TYPE_SCCC, HEM_TYPE_QPC_TIMER, HEM_TYPE_CQC_TIMER, + HEM_TYPE_GMV, /* UNMAP HEM */ HEM_TYPE_MTT, @@ -174,4 +175,4 @@ static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter) return sg_dma_address(&iter->chunk->mem[iter->page_idx]); } -#endif /*_HNS_ROCE_HEM_H*/ +#endif /* _HNS_ROCE_HEM_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index aeb3a6fa7d47..f68585ff8e8a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -70,15 +70,15 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct hns_roce_qp *qp = to_hr_qp(ibqp); struct device *dev = &hr_dev->pdev->dev; struct hns_roce_sq_db sq_db = {}; - int ps_opcode = 0, i = 0; + int ps_opcode, i; unsigned long flags = 0; void *wqe = NULL; __le32 doorbell[2]; - u32 wqe_idx = 0; - int nreq = 0; int ret = 0; - u8 *smac; int loopback; + u32 wqe_idx; + int nreq; + u8 *smac; if (unlikely(ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_RC)) { @@ -239,7 +239,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, break; } - /*Ctrl field, ctrl set type: sig, solic, imm, fence */ + /* Ctrl field, ctrl set type: sig, solic, imm, fence */ /* SO wait for conforming application scenarios */ ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ? cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | @@ -271,7 +271,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ps_opcode = HNS_ROCE_WQE_OPCODE_SEND; break; case IB_WR_LOCAL_INV: - break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_LSO: @@ -289,7 +288,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ret = -EINVAL; *bad_wr = wr; dev_err(dev, "inline len(1-%d)=%d, illegal", - ctrl->msg_length, + le32_to_cpu(ctrl->msg_length), hr_dev->caps.max_sq_inline); goto out; } @@ -301,7 +300,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, } ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE); } else { - /*sqe num is two */ + /* sqe num is two */ for (i = 0; i < wr->num_sge; i++) set_data_seg(dseg + i, wr->sg_list + i); @@ -354,8 +353,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, unsigned long flags = 0; unsigned int wqe_idx; int ret = 0; - int nreq = 0; - int i = 0; + int nreq; + int i; u32 reg_val; spin_lock_irqsave(&hr_qp->rq.lock, flags); @@ -888,7 +887,7 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev) u32 odb_ext_mod; u32 sdb_evt_mod; u32 odb_evt_mod; - int ret = 0; + int ret; memset(db, 0, sizeof(*db)); @@ -1148,8 +1147,8 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv = hr_dev->priv; struct hns_roce_raq_table *raq = &priv->raq_table; struct device *dev = &hr_dev->pdev->dev; - int raq_shift = 0; dma_addr_t addr; + int raq_shift; __le32 tmp; u32 val; int ret; @@ -1166,7 +1165,7 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) } raq->e_raq_buf->map = addr; - /* Configure raq extended address. 48bit 4K align*/ + /* Configure raq extended address. 48bit 4K align */ roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12); /* Configure raq_shift */ @@ -1360,7 +1359,7 @@ static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv = hr_dev->priv; struct hns_roce_free_mr *free_mr = &priv->free_mr; struct device *dev = &hr_dev->pdev->dev; - int ret = 0; + int ret; free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr"); if (!free_mr->free_mr_wq) { @@ -1440,8 +1439,8 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { - int i = 0; struct hns_roce_caps *caps = &hr_dev->caps; + int i; hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG); hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG); @@ -1471,12 +1470,12 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA; caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ; caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ; - caps->qpc_entry_sz = HNS_ROCE_V1_QPC_ENTRY_SIZE; + caps->qpc_sz = HNS_ROCE_V1_QPC_SIZE; caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE; caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE; caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE; caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE; - caps->cq_entry_sz = HNS_ROCE_V1_CQE_ENTRY_SIZE; + caps->cqe_sz = HNS_ROCE_V1_CQE_SIZE; caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT; caps->reserved_lkey = 0; caps->reserved_pds = 0; @@ -1640,10 +1639,10 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, - unsigned long timeout) + unsigned int timeout) { u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG; - unsigned long end = 0; + unsigned long end; u32 status = 0; end = msecs_to_jiffies(timeout) + jiffies; @@ -1671,7 +1670,7 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, { unsigned long flags; u32 *p = NULL; - u8 gid_idx = 0; + u8 gid_idx; gid_idx = hns_get_gid_index(hr_dev, port, gid_index); @@ -1897,8 +1896,7 @@ static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf, static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(hr_cq->mtr.kmem, - n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, n * HNS_ROCE_V1_CQE_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -2064,11 +2062,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); } -static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) -{ - return -EOPNOTSUPP; -} - static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { @@ -2307,7 +2300,7 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct hns_roce_qp *cur_qp = NULL; unsigned long flags; int npolled; - int ret = 0; + int ret; spin_lock_irqsave(&hr_cq->lock, flags); @@ -2445,7 +2438,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox; struct device *dev = &hr_dev->pdev->dev; - int ret = 0; + int ret; if (cur_state >= HNS_ROCE_QP_NUM_STATE || new_state >= HNS_ROCE_QP_NUM_STATE || @@ -2767,7 +2760,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qpc_bytes_16, QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { roce_set_field(context->qpc_bytes_4, QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, @@ -3263,6 +3255,8 @@ static int hns_roce_v1_modify_qp(struct ib_qp *ibqp, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state, @@ -3394,7 +3388,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct device *dev = &hr_dev->pdev->dev; struct hns_roce_qp_context *context; - int tmp_qp_state = 0; + int tmp_qp_state; int ret = 0; int state; @@ -3572,7 +3566,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return 0; } -static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); @@ -3603,12 +3597,13 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) } wait_time++; } + return 0; } -static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) +static void set_eq_cons_index_v1(struct hns_roce_eq *eq, u32 req_not) { roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) | - (req_not << eq->log_entries), eq->doorbell); + (req_not << eq->log_entries), eq->doorbell); } static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev, @@ -3688,10 +3683,10 @@ static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev, int phy_port; int qpn; - qpn = roce_get_field(aeqe->event.qp_event.qp, + qpn = roce_get_field(aeqe->event.queue_event.num, HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S); - phy_port = roce_get_field(aeqe->event.qp_event.qp, + phy_port = roce_get_field(aeqe->event.queue_event.num, HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M, HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S); if (qpn <= 1) @@ -3722,9 +3717,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; u32 cqn; - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); + cqn = roce_get_field(aeqe->event.queue_event.num, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -3775,8 +3770,7 @@ static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev, static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry) { - unsigned long off = (entry & (eq->entries - 1)) * - HNS_ROCE_AEQ_ENTRY_SIZE; + unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQE_SIZE; return (struct hns_roce_aeqe *)((u8 *) (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + @@ -3800,7 +3794,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, int event_type; while ((aeqe = next_aeqe_sw_v1(eq))) { - /* Make sure we read the AEQ entry after we have checked the * ownership bit */ @@ -3855,12 +3848,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: hns_roce_v1_db_overflow_handle(hr_dev, aeqe); break; - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - dev_warn(dev, "CEQ 0x%lx overflow.\n", - roce_get_field(aeqe->event.ce_event.ceqe, - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M, - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)); - break; default: dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n", event_type, eq->eqn, eq->cons_index); @@ -3881,8 +3868,7 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry) { - unsigned long off = (entry & (eq->entries - 1)) * - HNS_ROCE_CEQ_ENTRY_SIZE; + unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQE_SIZE; return (struct hns_roce_ceqe *)((u8 *) (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + @@ -3906,7 +3892,6 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev, u32 cqn; while ((ceqe = next_ceqe_sw_v1(eq))) { - /* Make sure we read CEQ entry after we have checked the * ownership bit */ @@ -3934,7 +3919,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr) { struct hns_roce_eq *eq = eq_ptr; struct hns_roce_dev *hr_dev = eq->hr_dev; - int int_work = 0; + int int_work; if (eq->type_flag == HNS_ROCE_CEQ) /* CEQ irq routine, CEQ is pulse irq, not clear */ @@ -4132,9 +4117,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; struct device *dev = &hr_dev->pdev->dev; dma_addr_t tmp_dma_addr; - u32 eqconsindx_val = 0; - u32 eqcuridx_val = 0; - u32 eqshift_val = 0; + u32 eqcuridx_val; + u32 eqconsindx_val; + u32 eqshift_val; __le32 tmp2 = 0; __le32 tmp1 = 0; __le32 tmp = 0; @@ -4253,7 +4238,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev) CEQ_REG_OFFSET * i; eq->entries = hr_dev->caps.ceqe_depth; eq->log_entries = ilog2(eq->entries); - eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE; + eq->eqe_size = HNS_ROCE_CEQE_SIZE; } else { /* AEQ */ eq_table->eqc_base[i] = hr_dev->reg_base + @@ -4263,7 +4248,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev) ROCEE_CAEP_AEQE_CONS_IDX_REG; eq->entries = hr_dev->caps.aeqe_depth; eq->log_entries = ilog2(eq->entries); - eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE; + eq->eqe_size = HNS_ROCE_AEQE_SIZE; } } @@ -4350,7 +4335,6 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) static const struct ib_device_ops hns_roce_v1_dev_ops = { .destroy_qp = hns_roce_v1_destroy_qp, - .modify_cq = hns_roce_v1_modify_cq, .poll_cq = hns_roce_v1_poll_cq, .post_recv = hns_roce_v1_post_recv, .post_send = hns_roce_v1_post_send, @@ -4370,7 +4354,6 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .set_mtu = hns_roce_v1_set_mtu, .write_mtpt = hns_roce_v1_write_mtpt, .write_cqc = hns_roce_v1_write_cqc, - .modify_cq = hns_roce_v1_modify_cq, .clear_hem = hns_roce_v1_clear_hem, .modify_qp = hns_roce_v1_modify_qp, .query_qp = hns_roce_v1_query_qp, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 52307b2c7100..46ab0a321d21 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -68,13 +68,13 @@ #define HNS_ROCE_V1_COMP_EQE_NUM 0x8000 #define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400 -#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256 +#define HNS_ROCE_V1_QPC_SIZE 256 #define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8 #define HNS_ROCE_V1_CQC_ENTRY_SIZE 64 #define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64 #define HNS_ROCE_V1_MTT_ENTRY_SIZE 64 -#define HNS_ROCE_V1_CQE_ENTRY_SIZE 32 +#define HNS_ROCE_V1_CQE_SIZE 32 #define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000 #define HNS_ROCE_V1_TABLE_CHUNK_SIZE (1 << 17) @@ -419,7 +419,7 @@ struct hns_roce_wqe_data_seg { struct hns_roce_wqe_raddr_seg { __le32 rkey; - __le32 len;/* reserved */ + __le32 len; /* reserved */ __le64 raddr; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4cda95ed1fbe..833e1f259936 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -153,97 +153,196 @@ static void set_atomic_seg(const struct ib_send_wr *wr, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); } -static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind, unsigned int valid_num_sge) +static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + const struct ib_send_wr *wr, + unsigned int *sge_idx, u32 msg_len) +{ + struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; + unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg); + unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len; + unsigned int left_len_in_pg; + unsigned int idx = *sge_idx; + unsigned int i = 0; + unsigned int len; + void *addr; + void *dseg; + + if (msg_len > ext_sge_sz) { + ibdev_err(ibdev, + "no enough extended sge space for inline data.\n"); + return -EINVAL; + } + + dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); + left_len_in_pg = hr_hw_page_align((uintptr_t)dseg) - (uintptr_t)dseg; + len = wr->sg_list[0].length; + addr = (void *)(unsigned long)(wr->sg_list[0].addr); + + /* When copying data to extended sge space, the left length in page may + * not long enough for current user's sge. So the data should be + * splited into several parts, one in the first page, and the others in + * the subsequent pages. + */ + while (1) { + if (len <= left_len_in_pg) { + memcpy(dseg, addr, len); + + idx += len / dseg_len; + + i++; + if (i >= wr->num_sge) + break; + + left_len_in_pg -= len; + len = wr->sg_list[i].length; + addr = (void *)(unsigned long)(wr->sg_list[i].addr); + dseg += len; + } else { + memcpy(dseg, addr, left_len_in_pg); + + len -= left_len_in_pg; + addr += left_len_in_pg; + idx += left_len_in_pg / dseg_len; + dseg = hns_roce_get_extend_sge(qp, + idx & (qp->sge.sge_cnt - 1)); + left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT; + } + } + + *sge_idx = idx; + + return 0; +} + +static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge, + unsigned int *sge_ind, unsigned int cnt) { struct hns_roce_v2_wqe_data_seg *dseg; - unsigned int cnt = valid_num_sge; - struct ib_sge *sge = wr->sg_list; unsigned int idx = *sge_ind; - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - cnt -= HNS_ROCE_SGE_IN_WQE; - sge += HNS_ROCE_SGE_IN_WQE; - } - while (cnt > 0) { dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); - set_data_seg_v2(dseg, sge); - idx++; + if (likely(sge->length)) { + set_data_seg_v2(dseg, sge); + idx++; + cnt--; + } sge++; - cnt--; } *sge_ind = idx; } +static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); + int mtu = ib_mtu_enum_to_int(qp->path_mtu); + + if (len > qp->max_inline_data || len > mtu) { + ibdev_err(&hr_dev->ib_dev, + "invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n", + len, qp->max_inline_data, mtu); + return false; + } + + return true; +} + +static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, + unsigned int *sge_idx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); + u32 msg_len = le32_to_cpu(rc_sq_wqe->msg_len); + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned int curr_idx = *sge_idx; + void *dseg = rc_sq_wqe; + unsigned int i; + int ret; + + if (unlikely(wr->opcode == IB_WR_RDMA_READ)) { + ibdev_err(ibdev, "invalid inline parameters!\n"); + return -EINVAL; + } + + if (!check_inl_data_len(qp, msg_len)) + return -EINVAL; + + dseg += sizeof(struct hns_roce_v2_rc_send_wqe); + + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); + + if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) { + roce_set_bit(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0); + + for (i = 0; i < wr->num_sge; i++) { + memcpy(dseg, ((void *)wr->sg_list[i].addr), + wr->sg_list[i].length); + dseg += wr->sg_list[i].length; + } + } else { + roce_set_bit(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1); + + ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len); + if (ret) + return ret; + + roce_set_field(rc_sq_wqe->byte_16, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, + curr_idx - *sge_idx); + } + + *sge_idx = curr_idx; + + return 0; +} + static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, unsigned int *sge_ind, unsigned int valid_num_sge) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); - struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); - void *wqe = dseg; int j = 0; int i; - if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { - if (unlikely(le32_to_cpu(rc_sq_wqe->msg_len) > - hr_dev->caps.max_sq_inline)) { - ibdev_err(ibdev, "inline len(1-%d)=%d, illegal", - rc_sq_wqe->msg_len, - hr_dev->caps.max_sq_inline); - return -EINVAL; - } + roce_set_field(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, + V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, + (*sge_ind) & (qp->sge.sge_cnt - 1)); - if (unlikely(wr->opcode == IB_WR_RDMA_READ)) { - ibdev_err(ibdev, "Not support inline data!\n"); - return -EINVAL; - } + if (wr->send_flags & IB_SEND_INLINE) + return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind); + if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { - memcpy(wqe, ((void *)wr->sg_list[i].addr), - wr->sg_list[i].length); - wqe += wr->sg_list[i].length; + if (likely(wr->sg_list[i].length)) { + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + } } - - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, - 1); } else { - if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { - for (i = 0; i < wr->num_sge; i++) { - if (likely(wr->sg_list[i].length)) { - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } + for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; i++) { + if (likely(wr->sg_list[i].length)) { + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + j++; } - } else { - roce_set_field(rc_sq_wqe->byte_20, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - (*sge_ind) & (qp->sge.sge_cnt - 1)); - - for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; - i++) { - if (likely(wr->sg_list[i].length)) { - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - j++; - } - } - - set_extend_sge(qp, wr, sge_ind, valid_num_sge); } - roce_set_field(rc_sq_wqe->byte_16, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + set_extend_sge(qp, wr->sg_list + i, sge_ind, + valid_num_sge - HNS_ROCE_SGE_IN_WQE); } + roce_set_field(rc_sq_wqe->byte_16, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + return 0; } @@ -262,7 +361,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %d!\n", + ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { @@ -292,70 +391,97 @@ static unsigned int calc_wr_sge_num(const struct ib_send_wr *wr, return valid_num; } +static __le32 get_immtdata(const struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); + default: + return 0; + } +} + +static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + const struct ib_send_wr *wr) +{ + u32 ib_op = wr->opcode; + + if (ib_op != IB_WR_SEND && ib_op != IB_WR_SEND_WITH_IMM) + return -EINVAL; + + ud_sq_wqe->immtdata = get_immtdata(wr); + + roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M, + V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + + return 0; +} + +static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + struct hns_roce_ah *ah) +{ + struct ib_device *ib_dev = ah->ibah.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); + + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, + V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, + V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); + + if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) + return -EINVAL; + + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, + V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); + + ud_sq_wqe->sgid_index = ah->av.gid_index; + + memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN); + memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2); + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + + roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, + ah->av.vlan_en); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, + V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + + return 0; +} + static inline int set_ud_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, unsigned int owner_bit) { - struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; - int valid_num_sge; + unsigned int valid_num_sge; u32 msg_len = 0; - bool loopback; - u8 *smac; + int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, - V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, - V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M, - V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M, - V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]); - - /* MAC loopback */ - smac = (u8 *)hr_dev->dev_addr[qp->port]; - loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; - - roce_set_bit(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); - - roce_set_field(ud_sq_wqe->byte_4, - V2_UD_SEND_WQE_BYTE_4_OPCODE_M, - V2_UD_SEND_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_V2_WQE_OP_SEND); + ret = set_ud_opcode(ud_sq_wqe, wr); + if (WARN_ON(ret)) + return ret; ud_sq_wqe->msg_len = cpu_to_le32(msg_len); - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - ud_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); - break; - default: - ud_sq_wqe->immtdata = 0; - break; - } - - /* Set sig attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, - (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + !!(wr->send_flags & IB_SEND_SIGNALED)); - /* Set se attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, - (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); - - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); + !!(wr->send_flags & IB_SEND_SOLICITED)); roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); @@ -368,40 +494,73 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, curr_idx & (qp->sge.sge_cnt - 1)); - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0); ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? qp->qkey : ud_wr(wr)->remote_qkey); roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, - V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, - V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port); - - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en ? 1 : 0); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, - V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); + ret = fill_ud_av(ud_sq_wqe, ah); + if (ret) + return ret; - memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); - set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); *sge_idx = curr_idx; + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return 0; } +static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, + const struct ib_send_wr *wr) +{ + u32 ib_op = wr->opcode; + + rc_sq_wqe->immtdata = get_immtdata(wr); + + switch (ib_op) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); + break; + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); + break; + case IB_WR_REG_MR: + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + break; + case IB_WR_LOCAL_INV: + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); + fallthrough; + case IB_WR_SEND_WITH_INV: + rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); + break; + default: + return -EINVAL; + } + + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + + return 0; +} static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -411,25 +570,16 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, unsigned int curr_idx = *sge_idx; unsigned int valid_num_sge; u32 msg_len = 0; - int ret = 0; + int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); rc_sq_wqe->msg_len = cpu_to_le32(msg_len); - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); - break; - case IB_WR_SEND_WITH_INV: - rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); - break; - default: - rc_sq_wqe->immtdata = 0; - break; - } + ret = set_rc_opcode(rc_sq_wqe, wr); + if (WARN_ON(ret)) + return ret; roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); @@ -440,36 +590,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); - - switch (wr->opcode) { - case IB_WR_RDMA_READ: - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); - rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); - break; - case IB_WR_LOCAL_INV: - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); - rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); - break; - case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, reg_wr(wr)); - break; - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey); - rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); - break; - default: - break; - } - - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, - to_hr_opcode(wr->opcode)); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); @@ -477,7 +597,18 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); + *sge_idx = curr_idx; + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return ret; } @@ -525,7 +656,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, unsigned int sge_idx; unsigned int wqe_idx; void *wqe = NULL; - int nreq; + u32 nreq; int ret; spin_lock_irqsave(&qp->sq.lock, flags); @@ -549,7 +680,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); if (unlikely(wr->num_sge > qp->sq.max_gs)) { - ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n", wr->num_sge, qp->sq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -562,7 +693,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); /* Corresponding to the QP type, wqe process separately */ - if (ibqp->qp_type == IB_QPT_GSI) + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit); else if (ibqp->qp_type == IB_QPT_RC) ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); @@ -634,7 +765,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { - ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", wr->num_sge, hr_qp->rq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -703,7 +834,7 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, int n) return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } -static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) { return hns_roce_buf_offset(idx_que->mtr.kmem, n << idx_que->entry_shift); @@ -744,12 +875,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; unsigned long flags; + unsigned int ind; __le32 *srq_idx; int ret = 0; int wqe_idx; void *wqe; int nreq; - int ind; int i; spin_lock_irqsave(&srq->lock, flags); @@ -894,8 +1025,8 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; - unsigned long instance_stage; /* the current instance stage */ - unsigned long reset_stage; /* the current reset stage */ + unsigned long instance_stage; /* the current instance stage */ + unsigned long reset_stage; /* the current reset stage */ unsigned long reset_cnt; bool sw_resetting; bool hw_resetting; @@ -994,7 +1125,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG, - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0); } else { @@ -1002,7 +1133,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG, - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0); } @@ -1449,6 +1580,10 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) PF_RES_DATA_4_PF_SCCC_BT_NUM_M, PF_RES_DATA_4_PF_SCCC_BT_NUM_S); + hr_dev->caps.gmv_bt_num = roce_get_field(req_b->gmv_idx_num, + PF_RES_DATA_5_PF_GMV_BT_NUM_M, + PF_RES_DATA_5_PF_GMV_BT_NUM_S); + return 0; } @@ -1682,7 +1817,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ; caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ; caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; - caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; + caps->qpc_sz = HNS_ROCE_V2_QPC_SZ; caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; @@ -1690,7 +1825,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; caps->idx_entry_sz = HNS_ROCE_V2_IDX_ENTRY_SZ; - caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE; + caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE; caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; caps->reserved_lkey = 0; caps->reserved_pds = 0; @@ -1739,6 +1874,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM; + caps->aeqe_size = HNS_ROCE_AEQE_SIZE; + caps->ceqe_size = HNS_ROCE_CEQE_SIZE; caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; @@ -1760,19 +1897,35 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->cqc_timer_buf_pg_sz = 0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; + caps->sccc_sz = HNS_ROCE_V2_SCCC_SZ; caps->sccc_ba_pg_sz = 0; caps->sccc_buf_pg_sz = 0; caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; + caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / + caps->gmv_entry_sz); + } } -static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, - int *buf_page_size, int *bt_page_size, u32 hem_type) +static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num, + u32 *buf_page_size, u32 *bt_page_size, u32 hem_type) { u64 obj_per_chunk; - int bt_chunk_size = 1 << PAGE_SHIFT; - int buf_chunk_size = 1 << PAGE_SHIFT; - int obj_per_chunk_default = buf_chunk_size / obj_size; + u64 bt_chunk_size = PAGE_SIZE; + u64 buf_chunk_size = PAGE_SIZE; + u64 obj_per_chunk_default = buf_chunk_size / obj_size; *buf_page_size = 0; *bt_page_size = 0; @@ -1797,8 +1950,8 @@ static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, obj_per_chunk = ctx_bt_num * obj_per_chunk_default; break; default: - pr_err("Table %d not support hop_num = %d!\n", hem_type, - hop_num); + pr_err("table %u not support hop_num = %u!\n", hem_type, + hop_num); return; } @@ -1855,7 +2008,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; caps->max_rq_desc_sz = resp_a->max_rq_desc_sz; caps->max_srq_desc_sz = resp_a->max_srq_desc_sz; - caps->cq_entry_sz = resp_a->cq_entry_sz; + caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE; caps->mtpt_entry_sz = resp_b->mtpt_entry_sz; caps->irrl_entry_sz = resp_b->irrl_entry_sz; @@ -1863,9 +2016,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqc_entry_sz = resp_b->cqc_entry_sz; caps->srqc_entry_sz = resp_b->srqc_entry_sz; caps->idx_entry_sz = resp_b->idx_entry_sz; - caps->sccc_entry_sz = resp_b->scc_ctx_entry_sz; + caps->sccc_sz = resp_b->sccc_sz; caps->max_mtu = resp_b->max_mtu; - caps->qpc_entry_sz = le16_to_cpu(resp_b->qpc_entry_sz); + caps->qpc_sz = HNS_ROCE_V2_QPC_SZ; caps->min_cqes = resp_b->min_cqes; caps->min_wqes = resp_b->min_wqes; caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap); @@ -1958,6 +2111,8 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; + caps->ceqe_size = HNS_ROCE_CEQE_SIZE; + caps->aeqe_size = HNS_ROCE_AEQE_SIZE; caps->mtt_ba_pg_sz = 0; caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; @@ -1981,7 +2136,23 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M, V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S); - calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num, + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; + caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); + } + + calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz, HEM_TYPE_QPC); calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num, @@ -1998,7 +2169,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, + calc_pg_sz(caps->num_qps, caps->sccc_sz, caps->sccc_hop_num, caps->sccc_bt_num, &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC); @@ -2018,6 +2189,56 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_config_qpc_size(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_entry_size *cfg_size = + (struct hns_roce_cfg_entry_size *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE, + false); + + cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_QPC_SIZE); + cfg_size->size = cpu_to_le32(hr_dev->caps.qpc_sz); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + +static int hns_roce_config_sccc_size(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_entry_size *cfg_size = + (struct hns_roce_cfg_entry_size *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE, + false); + + cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_SCCC_SIZE); + cfg_size->size = cpu_to_le32(hr_dev->caps.sccc_sz); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + +static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev) +{ + int ret; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return 0; + + ret = hns_roce_config_qpc_size(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "failed to cfg qpc sz, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_config_sccc_size(hr_dev); + if (ret) + dev_err(hr_dev->dev, "failed to cfg sccc sz, ret = %d.\n", ret); + + return ret; +} + static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) { struct hns_roce_caps *caps = &hr_dev->caps; @@ -2090,9 +2311,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) } ret = hns_roce_v2_set_bt(hr_dev); - if (ret) - dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", - ret); + if (ret) { + dev_err(hr_dev->dev, + "Configure bt attribute fail, ret = %d.\n", ret); + return ret; + } + + /* Configure the size of QPC, SCCC, etc. */ + ret = hns_roce_config_entry_size(hr_dev); return ret; } @@ -2173,10 +2399,10 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, u32 buf_chk_sz; dma_addr_t t; int func_num = 1; - int pg_num_a; - int pg_num_b; - int pg_num; - int size; + u32 pg_num_a; + u32 pg_num_b; + u32 pg_num; + u32 size; int i; switch (type) { @@ -2225,7 +2451,6 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, if (i < (pg_num - 1)) entry[i].blk_ba1_nxt_ptr |= (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S; - } link_tbl->npages = pg_num; link_tbl->pg_sz = buf_chk_sz; @@ -2267,24 +2492,13 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, link_tbl->table.map); } -static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +static int get_hem_table(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - int qpc_count, cqc_count; - int ret, i; - - /* TSQ includes SQ doorbell and ack doorbell */ - ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); - return ret; - } - - ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); - goto err_tpq_init_failed; - } + unsigned int qpc_count; + unsigned int cqc_count; + unsigned int gmv_count; + int ret; + int i; /* Alloc memory for QPC Timer buffer space chunk */ for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num; @@ -2308,8 +2522,23 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) } } + /* Alloc memory for GMV(GID/MAC/VLAN) table buffer space chunk */ + for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num; + gmv_count++) { + ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count); + if (ret) { + dev_err(hr_dev->dev, + "failed to get gmv table, ret = %d.\n", ret); + goto err_gmv_failed; + } + } + return 0; +err_gmv_failed: + for (i = 0; i < gmv_count; i++) + hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i); + err_cqc_timer_failed: for (i = 0; i < cqc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i); @@ -2318,6 +2547,34 @@ err_qpc_timer_failed: for (i = 0; i < qpc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i); + return ret; +} + +static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + int ret; + + /* TSQ includes SQ doorbell and ack doorbell */ + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TSQ, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TPQ, ret = %d.\n", ret); + goto err_tpq_init_failed; + } + + ret = get_hem_table(hr_dev); + if (ret) + goto err_get_hem_table_failed; + + return 0; + +err_get_hem_table_failed: hns_roce_free_link_table(hr_dev, &priv->tpq); err_tpq_init_failed: @@ -2341,7 +2598,7 @@ static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev) struct hns_roce_cmq_desc desc; struct hns_roce_mbox_status *mb_st = (struct hns_roce_mbox_status *)desc.data; - enum hns_roce_cmd_return_status status; + int status; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); @@ -2412,7 +2669,7 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, - unsigned long timeout) + unsigned int timeout) { struct device *dev = hr_dev->dev; unsigned long end; @@ -2439,14 +2696,27 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, - int gid_index, const union ib_gid *gid, - enum hns_roce_sgid_type sgid_type) +static void copy_gid(void *dest, const union ib_gid *gid) +{ +#define GID_SIZE 4 + const union ib_gid *src = gid; + __le32 (*p)[GID_SIZE] = dest; + int i; + + if (!gid) + src = &zgid; + + for (i = 0; i < GID_SIZE; i++) + (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]); +} + +static int config_sgid_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type) { struct hns_roce_cmq_desc desc; struct hns_roce_cfg_sgid_tb *sgid_tb = (struct hns_roce_cfg_sgid_tb *)desc.data; - u32 *p; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); @@ -2455,19 +2725,54 @@ static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); - p = (u32 *)&gid->raw[0]; - sgid_tb->vf_sgid_l = cpu_to_le32(*p); + copy_gid(&sgid_tb->vf_sgid_l, gid); - p = (u32 *)&gid->raw[4]; - sgid_tb->vf_sgid_ml = cpu_to_le32(*p); + return hns_roce_cmq_send(hr_dev, &desc, 1); +} - p = (u32 *)&gid->raw[8]; - sgid_tb->vf_sgid_mh = cpu_to_le32(*p); +static int config_gmv_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type, + const struct ib_gid_attr *attr) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cfg_gmv_tb_a *tb_a = + (struct hns_roce_cfg_gmv_tb_a *)desc[0].data; + struct hns_roce_cfg_gmv_tb_b *tb_b = + (struct hns_roce_cfg_gmv_tb_b *)desc[1].data; - p = (u32 *)&gid->raw[0xc]; - sgid_tb->vf_sgid_h = cpu_to_le32(*p); + u16 vlan_id = VLAN_CFI_MASK; + u8 mac[ETH_ALEN] = {}; + int ret; - return hns_roce_cmq_send(hr_dev, &desc, 1); + if (gid) { + ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac); + if (ret) + return ret; + } + + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false); + + copy_gid(&tb_a->vf_sgid_l, gid); + + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, + CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); + roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, + vlan_id < VLAN_CFI_MASK); + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, + CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); + + tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); + roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, + CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); + + roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, + CFG_GMV_TB_SGID_IDX_S, gid_index); + + return hns_roce_cmq_send(hr_dev, desc, 2); } static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, @@ -2477,23 +2782,24 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; int ret; - if (!gid || !attr) - return -EINVAL; - - if (attr->gid_type == IB_GID_TYPE_ROCE) - sgid_type = GID_TYPE_FLAG_ROCE_V1; - - if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { - if (ipv6_addr_v4mapped((void *)gid)) - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; - else - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + if (gid) { + if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + if (ipv6_addr_v4mapped((void *)gid)) + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; + else + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + } else if (attr->gid_type == IB_GID_TYPE_ROCE) { + sgid_type = GID_TYPE_FLAG_ROCE_V1; + } } - ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr); + else + ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to configure sgid table, ret = %d!\n", + ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n", ret); return ret; @@ -2738,6 +3044,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1); roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1); @@ -2757,11 +3064,10 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(hr_cq->mtr.kmem, - n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); } -static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) +static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n) { struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe); @@ -2858,6 +3164,13 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); + roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQE_SIZE_M, + V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == + HNS_ROCE_V3_CQE_SIZE ? 1 : 0); + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(cq_context, CQC_STASH); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, @@ -3025,7 +3338,8 @@ out: } static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, - struct hns_roce_v2_cqe *cqe, struct ib_wc *wc) + struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe, + struct ib_wc *wc) { static const struct { u32 cqe_status; @@ -3066,7 +3380,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe, - sizeof(*cqe), false); + cq->cqe_size, false); /* * For hns ROCEE, GENERAL_ERR is an error type that is not defined in @@ -3100,7 +3414,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, int is_send; u16 wqe_ctr; u32 opcode; - int qpn; + u32 qpn; int ret; /* Find cqe according to consumer index */ @@ -3163,7 +3477,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, ++wq->tail; } - get_cqe_status(hr_dev, *cur_qp, cqe, wc); + get_cqe_status(hr_dev, *cur_qp, hr_cq, cqe, wc); if (unlikely(wc->status != IB_WC_SUCCESS)) return 0; @@ -3369,7 +3683,7 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, break; default: dev_warn(hr_dev->dev, - "Table %d not to be written by mailbox!\n", type); + "table %u not to be written by mailbox!\n", type); return -EINVAL; } @@ -3380,9 +3694,25 @@ static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba, u32 hem_type, int step_idx) { struct hns_roce_cmd_mailbox *mailbox; + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_gmv_bt *gmv_bt = + (struct hns_roce_cfg_gmv_bt *)desc.data; int ret; int op; + if (hem_type == HEM_TYPE_GMV) { + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, + false); + + gmv_bt->gmv_ba_l = cpu_to_le32(bt_ba >> HNS_HW_PAGE_SHIFT); + gmv_bt->gmv_ba_h = cpu_to_le32(bt_ba >> (HNS_HW_PAGE_SHIFT + + 32)); + gmv_bt->gmv_bt_idx = cpu_to_le32(obj / + (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz)); + + return hns_roce_cmq_send(hr_dev, &desc, 1); + } + op = get_op_for_set_hem(hr_dev, hem_type, step_idx); if (op < 0) return 0; @@ -3480,24 +3810,20 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_CQC: op = HNS_ROCE_CMD_DESTROY_CQC_BT0; break; - case HEM_TYPE_SCCC: - case HEM_TYPE_QPC_TIMER: - case HEM_TYPE_CQC_TIMER: - break; case HEM_TYPE_SRQC: op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; + case HEM_TYPE_SCCC: + case HEM_TYPE_QPC_TIMER: + case HEM_TYPE_CQC_TIMER: + case HEM_TYPE_GMV: + return 0; default: - dev_warn(dev, "Table %d not to be destroyed by mailbox!\n", + dev_warn(dev, "table %u not to be destroyed by mailbox!\n", table->type); return 0; } - if (table->type == HEM_TYPE_SCCC || - table->type == HEM_TYPE_QPC_TIMER || - table->type == HEM_TYPE_CQC_TIMER) - return 0; - op += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -3514,16 +3840,21 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev, struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask, struct hns_roce_qp *hr_qp) { struct hns_roce_cmd_mailbox *mailbox; + int qpc_size; int ret; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - memcpy(mailbox->buf, context, sizeof(*context) * 2); + /* The qpc size of HIP08 is only 256B, which is half of HIP09 */ + qpc_size = hr_dev->caps.qpc_sz; + memcpy(mailbox->buf, context, qpc_size); + memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size); ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, HNS_ROCE_CMD_MODIFY_QPC, @@ -3641,14 +3972,16 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, V2_QPC_BYTE_76_SRQ_EN_S, 1); } - roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M, - V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4); - roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1); - hr_qp->access_flags = attr->qp_access_flags; roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); + + if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ) + return; + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(&context->ext, QPCEX_STASH); } static void modify_qp_init_to_init(struct ib_qp *ibqp, @@ -3669,51 +4002,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, V2_QPC_BYTE_4_TST_S, 0); - if (attr_mask & IB_QP_ACCESS_FLAGS) { - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - 0); - roce_set_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, 0); - } else { - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_WRITE)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - 0); - roce_set_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, 0); - } - roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, @@ -3954,6 +4242,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu mtu; + u8 lp_pktn_ini; u8 port_num; u64 *mtts; u8 *dmac; @@ -4052,6 +4341,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_52_DMAC_S, 0); mtu = get_mtu(ibqp, attr); + hr_qp->path_mtu = mtu; if (attr_mask & IB_QP_PATH_MTU) { roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, @@ -4061,13 +4351,21 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, } #define MAX_LP_MSG_LEN 65536 - /* MTU*(2^LP_PKTN_INI) shouldn't be bigger than 64kb */ + /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ + lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu)); + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, - V2_QPC_BYTE_56_LP_PKTN_INI_S, - ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu))); + V2_QPC_BYTE_56_LP_PKTN_INI_S, lp_pktn_ini); roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); + /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */ + roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M, + V2_QPC_BYTE_172_ACK_REQ_FREQ_S, lp_pktn_ini); + roce_set_field(qpc_mask->byte_172_sq_psn, + V2_QPC_BYTE_172_ACK_REQ_FREQ_M, + V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0); + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0); roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M, @@ -4113,7 +4411,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); if (ret) { - ibdev_err(ibdev, "failed to config sq buf, ret %d\n", ret); + ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret); return ret; } @@ -4164,6 +4462,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } +static inline u16 get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) +{ + if (!fl) + fl = rdma_calc_flow_label(lqpn, rqpn); + + return rdma_flow_label_to_udp_sport(fl); +} + static int hns_roce_v2_set_path(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4198,7 +4504,9 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan_id < VLAN_N_VID) { + /* Only HIP08 needs to set the vlan_en bits in QPC */ + if (vlan_id < VLAN_N_VID && + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4227,7 +4535,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, - is_udp ? 0x12b7 : 0); + is_udp ? get_udp_sport(grh->flow_label, ibqp->qp_num, + attr->dest_qp_num) : 0); roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, 0); @@ -4244,26 +4553,30 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - if (is_udp) - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); - else - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class); - + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, get_tclass(&attr->ah_attr.grh)); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 0); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, V2_QPC_BYTE_28_FL_S, grh->flow_label); roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, V2_QPC_BYTE_28_FL_S, 0); memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); + + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) { + ibdev_err(ibdev, + "failed to fill QPC, sl (%d) shouldn't be larger than %d.\n", + hr_qp->sl, MAX_SERVICE_LEVEL); + return -EINVAL; + } + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr)); + V2_QPC_BYTE_28_SL_S, hr_qp->sl); roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); return 0; } @@ -4309,7 +4622,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, } if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { - memset(qpc_mask, 0, sizeof(*qpc_mask)); + memset(qpc_mask, 0, hr_dev->caps.qpc_sz); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { @@ -4526,14 +4839,18 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, unsigned long rq_flag = 0; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* * In v2 engine, software pass context and context mask to hardware * when modifying qp. If software need modify some fields in context, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - memset(context, 0, sizeof(*context)); - memset(qpc_mask, 0xff, sizeof(*qpc_mask)); + memset(context, 0, hr_dev->caps.qpc_sz); + memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz); + ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state, new_state, context, qpc_mask); if (ret) @@ -4583,9 +4900,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_60_QP_ST_S, 0); /* SW pass context to HW */ - ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp); + ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); if (ret) { - ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret); + ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret); goto out; } @@ -4646,7 +4963,7 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, if (ret) goto out; - memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); + memcpy(hr_context, mailbox->buf, hr_dev->caps.qpc_sz); out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -4678,7 +4995,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); if (ret) { - ibdev_err(ibdev, "failed to query QPC, ret = %d\n", ret); + ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); ret = -EINVAL; goto out; } @@ -4757,9 +5074,11 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S); qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn, - V2_QPC_BYTE_212_RETRY_CNT_M, - V2_QPC_BYTE_212_RETRY_CNT_S); - qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer); + V2_QPC_BYTE_212_RETRY_NUM_INIT_M, + V2_QPC_BYTE_212_RETRY_NUM_INIT_S); + qp_attr->rnr_retry = roce_get_field(context.byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_NUM_INIT_M, + V2_QPC_BYTE_244_RNR_NUM_INIT_S); done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -4775,6 +5094,7 @@ done: } qp_init_attr->cap = qp_attr->cap; + qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits; out: mutex_unlock(&hr_qp->mutex); @@ -4790,13 +5110,15 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, unsigned long flags; int ret = 0; - if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { + if ((hr_qp->ibqp.qp_type == IB_QPT_RC || + hr_qp->ibqp.qp_type == IB_QPT_UD) && + hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); if (ret) ibdev_err(ibdev, - "failed to modify QP to RST, ret = %d\n", + "failed to modify QP to RST, ret = %d.\n", ret); } @@ -4835,7 +5157,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to destroy QP 0x%06lx, ret = %d\n", + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", hr_qp->qpn, ret); hns_roce_qp_destroy(hr_dev, hr_qp, udata); @@ -4858,7 +5180,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret); goto out; } @@ -4868,7 +5190,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, clr->qpn = cpu_to_le32(hr_qp->qpn); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret); goto out; } @@ -5004,6 +5326,10 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, struct hns_roce_cmd_mailbox *mailbox; int ret; + /* Resizing SRQs is not supported yet */ + if (srq_attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + if (srq_attr_mask & IB_SRQ_LIMIT) { if (srq_attr->srq_limit >= srq->wqe_cnt) return -EINVAL; @@ -5113,7 +5439,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when modifying CQ, ret = %d\n", + "failed to process cmd when modifying CQ, ret = %d.\n", ret); return ret; @@ -5124,8 +5450,6 @@ static void hns_roce_irq_work_handle(struct work_struct *work) struct hns_roce_work *irq_work = container_of(work, struct hns_roce_work, work); struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; - u32 qpn = irq_work->qpn; - u32 cqn = irq_work->cqn; switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5141,15 +5465,15 @@ static void hns_roce_irq_work_handle(struct work_struct *work) break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n", - qpn, irq_work->sub_type); + irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n", - qpn); + irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n", - qpn, irq_work->sub_type); + irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: ibdev_warn(ibdev, "SRQ limit reach.\n"); @@ -5161,10 +5485,10 @@ static void hns_roce_irq_work_handle(struct work_struct *work) ibdev_err(ibdev, "SRQ catas error.\n"); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - ibdev_err(ibdev, "CQ 0x%x access err.\n", cqn); + ibdev_err(ibdev, "CQ 0x%x access err.\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - ibdev_warn(ibdev, "CQ 0x%x overflow\n", cqn); + ibdev_warn(ibdev, "CQ 0x%x overflow\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: ibdev_warn(ibdev, "DB overflow.\n"); @@ -5180,8 +5504,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work) } static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, - u32 qpn, u32 cqn) + struct hns_roce_eq *eq, u32 queue_num) { struct hns_roce_work *irq_work; @@ -5191,10 +5514,9 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); irq_work->hr_dev = hr_dev; - irq_work->qpn = qpn; - irq_work->cqn = cqn; irq_work->event_type = eq->event_type; irq_work->sub_type = eq->sub_type; + irq_work->queue_num = queue_num; queue_work(hr_dev->irq_workq, &(irq_work->work)); } @@ -5233,7 +5555,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) aeqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_AEQ_ENTRY_SIZE); + eq->eqe_size); return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ !!(eq->cons_index & eq->entries)) ? aeqe : NULL; @@ -5246,10 +5568,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); int aeqe_found = 0; int event_type; + u32 queue_num; int sub_type; - u32 srqn; - u32 qpn; - u32 cqn; while (aeqe) { /* Make sure we read AEQ entry after we have checked the @@ -5263,15 +5583,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M, HNS_ROCE_V2_AEQE_SUB_TYPE_S); - qpn = roce_get_field(aeqe->event.qp_event.qp, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); - srqn = roce_get_field(aeqe->event.srq_event.srq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + queue_num = roce_get_field(aeqe->event.queue_event.num, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5282,17 +5596,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_qp_event(hr_dev, qpn, event_type); + hns_roce_qp_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - hns_roce_srq_event(hr_dev, srqn, event_type); + hns_roce_srq_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_cq_event(hr_dev, cqn, event_type); - break; - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: + hns_roce_cq_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, @@ -5300,8 +5612,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, aeqe->event.cmd.status, le64_to_cpu(aeqe->event.cmd.out_param)); break; - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - break; + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: case HNS_ROCE_EVENT_TYPE_FLR: break; default: @@ -5318,7 +5629,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, if (eq->cons_index > (2 * eq->entries - 1)) eq->cons_index = 0; - hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn); + hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); } @@ -5333,7 +5644,8 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) ceqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_CEQ_ENTRY_SIZE); + eq->eqe_size); + return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; } @@ -5374,7 +5686,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) { struct hns_roce_eq *eq = eq_ptr; struct hns_roce_dev *hr_dev = eq->hr_dev; - int int_work = 0; + int int_work; if (eq->type_flag == HNS_ROCE_CEQ) /* Completion event interrupt */ @@ -5609,14 +5921,16 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M, HNS_ROCE_EQC_CONS_INDX_S, HNS_ROCE_EQ_INIT_CONS_IDX); - /* set nex_eqe_ba[43:12] */ - roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M, + roce_set_field(eqc->byte_40, HNS_ROCE_EQC_NXT_EQE_BA_L_M, HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12); - /* set nex_eqe_ba[63:44] */ - roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M, + roce_set_field(eqc->byte_44, HNS_ROCE_EQC_NXT_EQE_BA_H_M, HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44); + roce_set_field(eqc->byte_44, HNS_ROCE_EQC_EQE_SIZE_M, + HNS_ROCE_EQC_EQE_SIZE_S, + eq->eqe_size == HNS_ROCE_V3_EQE_SIZE ? 1 : 0); + return 0; } @@ -5807,7 +6121,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) eq_cmd = HNS_ROCE_CMD_CREATE_CEQC; eq->type_flag = HNS_ROCE_CEQ; eq->entries = hr_dev->caps.ceqe_depth; - eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE; + eq->eqe_size = hr_dev->caps.ceqe_size; eq->irq = hr_dev->irq[i + other_num + aeq_num]; eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM; eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL; @@ -5816,7 +6130,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) eq_cmd = HNS_ROCE_CMD_CREATE_AEQC; eq->type_flag = HNS_ROCE_AEQ; eq->entries = hr_dev->caps.aeqe_depth; - eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE; + eq->eqe_size = hr_dev->caps.aeqe_size; eq->irq = hr_dev->irq[i - comp_num + other_num]; eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM; eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL; @@ -5950,6 +6264,7 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0}, /* required last entry */ {0, } }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index ac29be43b6bd..bdaccf86460d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -44,6 +44,7 @@ #define HNS_ROCE_VF_SMAC_NUM 32 #define HNS_ROCE_VF_SGID_NUM 32 #define HNS_ROCE_VF_SL_NUM 8 +#define HNS_ROCE_VF_GMV_BT_NUM 256 #define HNS_ROCE_V2_MAX_QP_NUM 0x100000 #define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200 @@ -60,6 +61,7 @@ #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 +#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_UAR_NUM 256 #define HNS_ROCE_V2_PHY_UAR_NUM 1 #define HNS_ROCE_V2_MAX_IRQ_NUM 65 @@ -77,7 +79,6 @@ #define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64 #define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16 #define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64 -#define HNS_ROCE_V2_QPC_ENTRY_SZ 256 #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 #define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100 @@ -86,8 +87,11 @@ #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_IDX_ENTRY_SZ 4 -#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 -#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 + +#define HNS_ROCE_V2_SCCC_SZ 32 +#define HNS_ROCE_V3_SCCC_SZ 64 +#define HNS_ROCE_V3_GMV_ENTRY_SZ 32 + #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 @@ -229,6 +233,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406, HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408, + HNS_ROCE_OPC_CFG_ENTRY_SIZE = 0x8409, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_POST_MB = 0x8504, @@ -238,6 +243,8 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CLR_SCCC = 0x8509, HNS_ROCE_OPC_QUERY_SCCC = 0x850a, HNS_ROCE_OPC_RESET_SCCC = 0x850b, + HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, + HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -260,23 +267,24 @@ enum hns_roce_sgid_type { }; struct hns_roce_v2_cq_context { - __le32 byte_4_pg_ceqn; - __le32 byte_8_cqn; - __le32 cqe_cur_blk_addr; - __le32 byte_16_hop_addr; - __le32 cqe_nxt_blk_addr; - __le32 byte_24_pgsz_addr; - __le32 byte_28_cq_pi; - __le32 byte_32_cq_ci; - __le32 cqe_ba; - __le32 byte_40_cqe_ba; - __le32 byte_44_db_record; - __le32 db_record_addr; - __le32 byte_52_cqe_cnt; - __le32 byte_56_cqe_period_maxcnt; - __le32 cqe_report_timer; - __le32 byte_64_se_cqe_idx; + __le32 byte_4_pg_ceqn; + __le32 byte_8_cqn; + __le32 cqe_cur_blk_addr; + __le32 byte_16_hop_addr; + __le32 cqe_nxt_blk_addr; + __le32 byte_24_pgsz_addr; + __le32 byte_28_cq_pi; + __le32 byte_32_cq_ci; + __le32 cqe_ba; + __le32 byte_40_cqe_ba; + __le32 byte_44_db_record; + __le32 db_record_addr; + __le32 byte_52_cqe_cnt; + __le32 byte_56_cqe_period_maxcnt; + __le32 cqe_report_timer; + __le32 byte_64_se_cqe_idx; }; + #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 @@ -309,6 +317,9 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_8_CQN_S 0 #define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0) +#define V2_CQC_BYTE_8_CQE_SIZE_S 27 +#define V2_CQC_BYTE_8_CQE_SIZE_M GENMASK(28, 27) + #define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0 #define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0) @@ -350,6 +361,10 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l) + +#define CQC_STASH CQC_FIELD_LOC(63, 63) + struct hns_roce_srq_context { __le32 byte_4_srqn_srqst; __le32 byte_8_limit_wl; @@ -434,7 +449,7 @@ struct hns_roce_srq_context { #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) -enum{ +enum { V2_MPT_ST_VALID = 0x1, V2_MPT_ST_FREE = 0x2, }; @@ -451,67 +466,72 @@ enum hns_roce_v2_qp_state { HNS_ROCE_QP_NUM_ST }; +struct hns_roce_v2_qp_context_ex { + __le32 data[64]; +}; struct hns_roce_v2_qp_context { - __le32 byte_4_sqpn_tst; - __le32 wqe_sge_ba; - __le32 byte_12_sq_hop; - __le32 byte_16_buf_ba_pg_sz; - __le32 byte_20_smac_sgid_idx; - __le32 byte_24_mtu_tc; - __le32 byte_28_at_fl; - u8 dgid[GID_LEN_V2]; - __le32 dmac; - __le32 byte_52_udpspn_dmac; - __le32 byte_56_dqpn_err; - __le32 byte_60_qpst_tempid; - __le32 qkey_xrcd; - __le32 byte_68_rq_db; - __le32 rq_db_record_addr; - __le32 byte_76_srqn_op_en; - __le32 byte_80_rnr_rx_cqn; - __le32 byte_84_rq_ci_pi; - __le32 rq_cur_blk_addr; - __le32 byte_92_srq_info; - __le32 byte_96_rx_reqmsn; - __le32 rq_nxt_blk_addr; - __le32 byte_104_rq_sge; - __le32 byte_108_rx_reqepsn; - __le32 rq_rnr_timer; - __le32 rx_msg_len; - __le32 rx_rkey_pkt_info; - __le64 rx_va; - __le32 byte_132_trrl; - __le32 trrl_ba; - __le32 byte_140_raq; - __le32 byte_144_raq; - __le32 byte_148_raq; - __le32 byte_152_raq; - __le32 byte_156_raq; - __le32 byte_160_sq_ci_pi; - __le32 sq_cur_blk_addr; - __le32 byte_168_irrl_idx; - __le32 byte_172_sq_psn; - __le32 byte_176_msg_pktn; - __le32 sq_cur_sge_blk_addr; - __le32 byte_184_irrl_idx; - __le32 cur_sge_offset; - __le32 byte_192_ext_sge; - __le32 byte_196_sq_psn; - __le32 byte_200_sq_max; - __le32 irrl_ba; - __le32 byte_208_irrl; - __le32 byte_212_lsn; - __le32 sq_timer; - __le32 byte_220_retry_psn_msn; - __le32 byte_224_retry_msg; - __le32 rx_sq_cur_blk_addr; - __le32 byte_232_irrl_sge; - __le32 irrl_cur_sge_offset; - __le32 byte_240_irrl_tail; - __le32 byte_244_rnr_rxack; - __le32 byte_248_ack_psn; - __le32 byte_252_err_txcqn; - __le32 byte_256_sqflush_rqcqe; + __le32 byte_4_sqpn_tst; + __le32 wqe_sge_ba; + __le32 byte_12_sq_hop; + __le32 byte_16_buf_ba_pg_sz; + __le32 byte_20_smac_sgid_idx; + __le32 byte_24_mtu_tc; + __le32 byte_28_at_fl; + u8 dgid[GID_LEN_V2]; + __le32 dmac; + __le32 byte_52_udpspn_dmac; + __le32 byte_56_dqpn_err; + __le32 byte_60_qpst_tempid; + __le32 qkey_xrcd; + __le32 byte_68_rq_db; + __le32 rq_db_record_addr; + __le32 byte_76_srqn_op_en; + __le32 byte_80_rnr_rx_cqn; + __le32 byte_84_rq_ci_pi; + __le32 rq_cur_blk_addr; + __le32 byte_92_srq_info; + __le32 byte_96_rx_reqmsn; + __le32 rq_nxt_blk_addr; + __le32 byte_104_rq_sge; + __le32 byte_108_rx_reqepsn; + __le32 rq_rnr_timer; + __le32 rx_msg_len; + __le32 rx_rkey_pkt_info; + __le64 rx_va; + __le32 byte_132_trrl; + __le32 trrl_ba; + __le32 byte_140_raq; + __le32 byte_144_raq; + __le32 byte_148_raq; + __le32 byte_152_raq; + __le32 byte_156_raq; + __le32 byte_160_sq_ci_pi; + __le32 sq_cur_blk_addr; + __le32 byte_168_irrl_idx; + __le32 byte_172_sq_psn; + __le32 byte_176_msg_pktn; + __le32 sq_cur_sge_blk_addr; + __le32 byte_184_irrl_idx; + __le32 cur_sge_offset; + __le32 byte_192_ext_sge; + __le32 byte_196_sq_psn; + __le32 byte_200_sq_max; + __le32 irrl_ba; + __le32 byte_208_irrl; + __le32 byte_212_lsn; + __le32 sq_timer; + __le32 byte_220_retry_psn_msn; + __le32 byte_224_retry_msg; + __le32 rx_sq_cur_blk_addr; + __le32 byte_232_irrl_sge; + __le32 irrl_cur_sge_offset; + __le32 byte_240_irrl_tail; + __le32 byte_244_rnr_rxack; + __le32 byte_248_ack_psn; + __le32 byte_252_err_txcqn; + __le32 byte_256_sqflush_rqcqe; + + struct hns_roce_v2_qp_context_ex ext; }; #define V2_QPC_BYTE_4_TST_S 0 @@ -880,6 +900,10 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16) +#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l) + +#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82) + #define V2_QP_RWE_S 1 /* rdma write enable */ #define V2_QP_RRE_S 2 /* rdma read enable */ #define V2_QP_ATE_S 3 /* rdma atomic enable */ @@ -896,6 +920,7 @@ struct hns_roce_v2_cqe { u8 smac[4]; __le32 byte_28; __le32 byte_32; + __le32 rsv[8]; }; #define V2_CQE_BYTE_4_OPCODE_S 0 @@ -1065,12 +1090,13 @@ struct hns_roce_v2_ud_send_wqe { __le32 byte_32; __le32 byte_36; __le32 byte_40; - __le32 dmac; - __le32 byte_48; + u8 dmac[ETH_ALEN]; + u8 sgid_index; + u8 smac_index; u8 dgid[GID_LEN_V2]; - }; -#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 + +#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 #define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) #define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 @@ -1109,37 +1135,10 @@ struct hns_roce_v2_ud_send_wqe { #define V2_UD_SEND_WQE_BYTE_40_SL_S 20 #define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) -#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24 -#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24) - #define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 -#define V2_UD_SEND_WQE_DMAC_0_S 0 -#define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_DMAC_1_S 8 -#define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_DMAC_2_S 16 -#define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_DMAC_3_S 24 -#define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16 -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24 -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24) - struct hns_roce_v2_rc_send_wqe { __le32 byte_4; __le32 msg_len; @@ -1187,6 +1186,8 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) +#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31 + struct hns_roce_wqe_frmr_seg { __le32 pbl_size; __le32 mode_buf_pg_sz; @@ -1324,7 +1325,7 @@ struct hns_roce_pf_res_b { __le32 sgid_idx_num; __le32 qid_idx_sl_num; __le32 sccc_bt_idx_num; - __le32 rsv; + __le32 gmv_idx_num; }; #define PF_RES_DATA_1_PF_SMAC_IDX_S 0 @@ -1351,6 +1352,12 @@ struct hns_roce_pf_res_b { #define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9 #define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9) +#define PF_RES_DATA_5_PF_GMV_BT_IDX_S 0 +#define PF_RES_DATA_5_PF_GMV_BT_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_5_PF_GMV_BT_NUM_S 8 +#define PF_RES_DATA_5_PF_GMV_BT_NUM_M GENMASK(16, 8) + struct hns_roce_pf_timer_res_a { __le32 rsv0; __le32 qpc_timer_bt_idx_num; @@ -1415,7 +1422,7 @@ struct hns_roce_vf_res_b { __le32 vf_sgid_idx_num; __le32 vf_qid_idx_sl_num; __le32 vf_sccc_idx_num; - __le32 rsv1; + __le32 vf_gmv_idx_num; }; #define VF_RES_B_DATA_0_VF_ID_S 0 @@ -1445,6 +1452,12 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9 #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9) +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_S 0 +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_M GENMASK(7, 0) + +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_S 16 +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_M GENMASK(24, 16) + struct hns_roce_vf_switch { __le32 rocee_sel; __le32 fun_id; @@ -1537,6 +1550,18 @@ struct hns_roce_cfg_sgid_tb { __le32 vf_sgid_h; __le32 vf_sgid_type_rsv; }; + +enum { + HNS_ROCE_CFG_QPC_SIZE = BIT(0), + HNS_ROCE_CFG_SCCC_SIZE = BIT(1), +}; + +struct hns_roce_cfg_entry_size { + __le32 type; + __le32 rsv[4]; + __le32 size; +}; + #define CFG_SGID_TB_TABLE_IDX_S 0 #define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) @@ -1555,6 +1580,46 @@ struct hns_roce_cfg_smac_tb { #define CFG_SMAC_TB_VF_SMAC_H_S 0 #define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) +struct hns_roce_cfg_gmv_bt { + __le32 gmv_ba_l; + __le32 gmv_ba_h; + __le32 gmv_bt_idx; + __le32 rsv[3]; +}; + +#define CFG_GMV_BA_H_S 0 +#define CFG_GMV_BA_H_M GENMASK(19, 0) + +struct hns_roce_cfg_gmv_tb_a { + __le32 vf_sgid_l; + __le32 vf_sgid_ml; + __le32 vf_sgid_mh; + __le32 vf_sgid_h; + __le32 vf_sgid_type_vlan; + __le32 resv; +}; + +#define CFG_GMV_TB_SGID_IDX_S 0 +#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) + +#define CFG_GMV_TB_VF_SGID_TYPE_S 0 +#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) + +#define CFG_GMV_TB_VF_VLAN_EN_S 2 + +#define CFG_GMV_TB_VF_VLAN_ID_S 16 +#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) + +struct hns_roce_cfg_gmv_tb_b { + __le32 vf_smac_l; + __le32 vf_smac_h; + __le32 table_idx_rsv; + __le32 resv[3]; +}; + +#define CFG_GMV_TB_SMAC_H_S 0 +#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) + #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 struct hns_roce_query_pf_caps_a { u8 number_ports; @@ -1571,7 +1636,7 @@ struct hns_roce_query_pf_caps_a { u8 max_sq_desc_sz; u8 max_rq_desc_sz; u8 max_srq_desc_sz; - u8 cq_entry_sz; + u8 cqe_sz; }; struct hns_roce_query_pf_caps_b { @@ -1581,9 +1646,9 @@ struct hns_roce_query_pf_caps_b { u8 cqc_entry_sz; u8 srqc_entry_sz; u8 idx_entry_sz; - u8 scc_ctx_entry_sz; + u8 sccc_sz; u8 max_mtu; - __le16 qpc_entry_sz; + __le16 qpc_sz; __le16 qpc_timer_entry_sz; __le16 cqc_timer_entry_sz; u8 min_cqes; @@ -1639,7 +1704,7 @@ struct hns_roce_query_pf_caps_d { __le32 rsv_uars_rsv_qps; }; #define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0 -#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(20, 0) +#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(19, 0) #define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20 #define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20) @@ -1777,8 +1842,8 @@ struct hns_roce_eq_context { __le32 byte_28; __le32 byte_32; __le32 byte_36; - __le32 nxt_eqe_ba0; - __le32 nxt_eqe_ba1; + __le32 byte_40; + __le32 byte_44; __le32 rsv[5]; }; @@ -1920,6 +1985,9 @@ struct hns_roce_eq_context { #define HNS_ROCE_EQC_NXT_EQE_BA_H_S 0 #define HNS_ROCE_EQC_NXT_EQE_BA_H_M GENMASK(19, 0) +#define HNS_ROCE_EQC_EQE_SIZE_S 20 +#define HNS_ROCE_EQC_EQE_SIZE_M GENMASK(21, 20) + #define HNS_ROCE_V2_CEQE_COMP_CQN_S 0 #define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0) @@ -1941,6 +2009,8 @@ struct hns_roce_eq_context { #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) +#define MAX_SERVICE_LEVEL 0x7 + struct hns_roce_wqe_atomic_seg { __le64 fetchadd_swap_data; __le64 cmp_data; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 5907cfd878a6..d9179bae4989 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -33,13 +33,13 @@ #include <linux/acpi.h> #include <linux/of_platform.h> #include <linux/module.h> +#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> #include "hns_roce_common.h" #include "hns_roce_device.h" -#include <rdma/hns-abi.h> #include "hns_roce_hem.h" /** @@ -53,7 +53,7 @@ * GID[0][0], GID[1][0],.....GID[N - 1][0], * And so on */ -int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) { return gid_index * hr_dev->caps.num_ports + port; } @@ -61,7 +61,10 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; - u32 i = 0; + u32 i; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN)) return 0; @@ -90,14 +93,13 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); - struct ib_gid_attr zattr = {}; u8 port = attr->port_num - 1; int ret; if (port >= hr_dev->caps.num_ports) return -EINVAL; - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, NULL, NULL); return ret; } @@ -141,8 +143,8 @@ static int hns_roce_netdev_event(struct notifier_block *self, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct hns_roce_ib_iboe *iboe = NULL; struct hns_roce_dev *hr_dev = NULL; - u8 port = 0; - int ret = 0; + int ret; + u8 port; hr_dev = container_of(self, struct hns_roce_dev, iboe.nb); iboe = &hr_dev->iboe; @@ -323,7 +325,10 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_init(&context->page_mutex); } - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + resp.cqe_size = hr_dev->caps.cqe_sz; + + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto error_fail_copy_to_udata; @@ -419,6 +424,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, .create_ah = hns_roce_create_ah, + .create_user_ah = hns_roce_create_ah, .create_cq = hns_roce_create_cq, .create_qp = hns_roce_create_qp, .dealloc_pd = hns_roce_dealloc_pd, @@ -454,6 +460,8 @@ static const struct ib_device_ops hns_roce_dev_mr_ops = { static const struct ib_device_ops hns_roce_dev_mw_ops = { .alloc_mw = hns_roce_alloc_mw, .dealloc_mw = hns_roce_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, hns_roce_mw, ibmw), }; static const struct ib_device_ops hns_roce_dev_frmr_ops = { @@ -487,36 +495,13 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->phys_port_cnt = hr_dev->caps.num_ports; ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; - ib_dev->uverbs_cmd_mask = - (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_REG_MR) | - (1ULL << IB_USER_VERBS_CMD_DEREG_MR) | - (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ULL << IB_USER_VERBS_CMD_CREATE_QP) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); - - ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { - ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); - } /* MW */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); - } /* FRMR */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) @@ -524,12 +509,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* SRQ */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); } @@ -545,7 +524,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - ret = ib_register_device(ib_dev, "hns_%d"); + dma_set_max_seg_size(dev, UINT_MAX); + ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); return ret; @@ -575,8 +555,8 @@ error_failed_setup_mtu_mac: static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, @@ -587,7 +567,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table, - HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz, + HEM_TYPE_QPC, hr_dev->caps.qpc_sz, hr_dev->caps.num_qps, 1); if (ret) { dev_err(dev, "Failed to init QP context memory, aborting.\n"); @@ -626,7 +606,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_trrl; } - if (hr_dev->caps.srqc_entry_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, HEM_TYPE_SRQC, hr_dev->caps.srqc_entry_sz, @@ -638,11 +618,11 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } - if (hr_dev->caps.sccc_entry_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.sccc_table, HEM_TYPE_SCCC, - hr_dev->caps.sccc_entry_sz, + hr_dev->caps.sccc_sz, hr_dev->caps.num_qps, 1); if (ret) { dev_err(dev, @@ -675,18 +655,35 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } + if (hr_dev->caps.gmv_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table, + HEM_TYPE_GMV, + hr_dev->caps.gmv_entry_sz, + hr_dev->caps.gmv_entry_num, 1); + if (ret) { + dev_err(dev, + "failed to init gmv table memory, ret = %d\n", + ret); + goto err_unmap_cqc_timer; + } + } + return 0; +err_unmap_cqc_timer: + if (hr_dev->caps.cqc_timer_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); + err_unmap_qpc_timer: if (hr_dev->caps.qpc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); err_unmap_ctx: - if (hr_dev->caps.sccc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); err_unmap_srq: - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); err_unmap_cq: @@ -716,8 +713,8 @@ err_unmap_dmpt: */ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; spin_lock_init(&hr_dev->sm_lock); spin_lock_init(&hr_dev->bt_cmd_lock); @@ -841,8 +838,8 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) int hns_roce_init(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; if (hr_dev->hw->reset) { ret = hr_dev->hw->reset(hr_dev, true); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index e5df3884b41d..1bcffd93ff3e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -167,10 +167,10 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - int ret; unsigned long mtpt_idx = key_to_hw_index(mr->key); - struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = hr_dev->dev; + int ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -185,14 +185,14 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, else ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); if (ret) { - dev_err(dev, "Write mtpt fail!\n"); + dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); goto err_page; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "CREATE_MPT failed (%d)\n", ret); + dev_err(dev, "failed to create mpt, ret = %d.\n", ret); goto err_page; } @@ -328,9 +328,10 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, return ret; } -int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct ib_device *ib_dev = &hr_dev->ib_dev; @@ -341,11 +342,11 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, int ret; if (!mr->enabled) - return -EINVAL; + return ERR_PTR(-EINVAL); mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + return ERR_CAST(mailbox); mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, @@ -390,12 +391,12 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return 0; + return NULL; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return ret; + return ERR_PTR(ret); } int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) @@ -495,7 +496,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); if (ret < 1) { - ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n", + ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); goto err_page_list; } @@ -509,7 +510,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); ret = 0; } else { - mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size); + mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); ret = mr->npages; } @@ -589,28 +590,22 @@ err_table: return ret; } -struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, - struct ib_udata *udata) +int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device); - struct hns_roce_mw *mw; + struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device); + struct hns_roce_mw *mw = to_hr_mw(ibmw); unsigned long index = 0; int ret; - mw = kmalloc(sizeof(*mw), GFP_KERNEL); - if (!mw) - return ERR_PTR(-ENOMEM); - /* Allocate a key for mw from bitmap */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); if (ret) - goto err_bitmap; + return ret; mw->rkey = hw_index_to_key(index); - mw->ibmw.rkey = mw->rkey; - mw->ibmw.type = type; - mw->pdn = to_hr_pd(ib_pd)->pdn; + ibmw->rkey = mw->rkey; + mw->pdn = to_hr_pd(ibmw->pd)->pdn; mw->pbl_hop_num = hr_dev->caps.pbl_hop_num; mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; @@ -619,15 +614,11 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, if (ret) goto err_mw; - return &mw->ibmw; + return 0; err_mw: hns_roce_mw_free(hr_dev, mw); - -err_bitmap: - kfree(mw); - - return ERR_PTR(ret); + return ret; } int hns_roce_dealloc_mw(struct ib_mw *ibmw) @@ -636,8 +627,6 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) struct hns_roce_mw *mw = to_hr_mw(ibmw); hns_roce_mw_free(hr_dev, mw); - kfree(mw); - return 0; } @@ -707,28 +696,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) return size; } -static inline int mtr_umem_page_count(struct ib_umem *umem, - unsigned int page_shift) -{ - int count = ib_umem_page_count(umem); - - if (page_shift >= PAGE_SHIFT) - count >>= page_shift - PAGE_SHIFT; - else - count <<= PAGE_SHIFT - page_shift; - - return count; -} - -static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, - unsigned int page_shift) -{ - if (is_direct) - return ALIGN(alloc_size, 1 << page_shift); - else - return HNS_HW_DIRECT_PAGE_COUNT << page_shift; -} - /* * check the given pages in continuous address space * Returns 0 on success, or the error page num. @@ -757,7 +724,6 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) /* release kernel buffers */ if (mtr->kmem) { hns_roce_buf_free(hr_dev, mtr->kmem); - kfree(mtr->kmem); mtr->kmem = NULL; } } @@ -767,63 +733,62 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - unsigned int max_pg_shift = buf_attr->page_shift; - unsigned int best_pg_shift = 0; + unsigned int best_pg_shift; int all_pg_count = 0; - size_t direct_size; size_t total_size; - unsigned long tmp; - int ret = 0; + int ret; total_size = mtr_bufs_size(buf_attr); if (total_size < 1) { - ibdev_err(ibdev, "Failed to check mtr size\n"); + ibdev_err(ibdev, "failed to check mtr size\n."); return -EINVAL; } if (udata) { + unsigned long pgsz_bitmap; + unsigned long page_size; + mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); if (IS_ERR_OR_NULL(mtr->umem)) { - ibdev_err(ibdev, "Failed to get umem, ret %ld\n", + ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", PTR_ERR(mtr->umem)); return -ENOMEM; } - if (buf_attr->fixed_page) { - best_pg_shift = max_pg_shift; - } else { - tmp = GENMASK(max_pg_shift, 0); - ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr); - best_pg_shift = (ret <= PAGE_SIZE) ? - PAGE_SHIFT : ilog2(ret); - } - all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift); + if (buf_attr->fixed_page) + pgsz_bitmap = 1 << buf_attr->page_shift; + else + pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT); + + page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap, + user_addr); + if (!page_size) + return -EINVAL; + best_pg_shift = order_base_2(page_size); + all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size); ret = 0; } else { mtr->umem = NULL; - mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); - if (!mtr->kmem) { - ibdev_err(ibdev, "Failed to alloc kmem\n"); - return -ENOMEM; - } - direct_size = mtr_kmem_direct_size(is_direct, total_size, - max_pg_shift); - ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, - mtr->kmem, max_pg_shift); - if (ret) { - ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); - goto err_alloc_mem; - } else { - best_pg_shift = max_pg_shift; - all_pg_count = mtr->kmem->npages; + mtr->kmem = + hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + is_direct ? HNS_ROCE_BUF_DIRECT : 0); + if (IS_ERR(mtr->kmem)) { + ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", + PTR_ERR(mtr->kmem)); + return PTR_ERR(mtr->kmem); } + + best_pg_shift = buf_attr->page_shift; + all_pg_count = mtr->kmem->npages; } /* must bigger than minimum hardware page shift */ if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { ret = -EINVAL; - ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", + ibdev_err(ibdev, + "failed to check mtr, page shift = %u count = %d.\n", best_pg_shift, all_pg_count); goto err_alloc_mem; } @@ -864,12 +829,12 @@ static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int page_cnt) + dma_addr_t *pages, unsigned int page_cnt) { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; + unsigned int i; int err; - int i; /* * Only use the first page address as root ba when hopnum is 0, this @@ -885,7 +850,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, if (r->offset + r->count > page_cnt) { err = -EINVAL; ibdev_err(ibdev, - "Failed to check mtr%d end %d + %d, max %d\n", + "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); return err; } @@ -893,7 +858,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); if (err) { ibdev_err(ibdev, - "Failed to map mtr%d offset %d, err %d\n", + "failed to map mtr%u offset %u, ret = %d.\n", i, r->offset, err); return err; } @@ -906,13 +871,12 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int mtt_count, left; int start_index; - int mtt_count; int total = 0; __le64 *mtts; - int npage; + u32 npage; u64 addr; - int left; if (!mtt_buf || mtt_max < 1) goto done; @@ -967,7 +931,7 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, unsigned int *buf_page_shift) { struct hns_roce_buf_region *r; - unsigned int page_shift = 0; + unsigned int page_shift; int page_cnt = 0; size_t buf_size; int region_cnt; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index b10c50b8736e..cca818d05a8f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -32,7 +32,6 @@ #include <linux/platform_device.h> #include <linux/pci.h> -#include <uapi/rdma/hns-abi.h> #include "hns_roce_device.h" static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) @@ -65,26 +64,28 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); if (ret) { - ibdev_err(ib_dev, "failed to alloc pd, ret = %d\n", ret); + ibdev_err(ib_dev, "failed to alloc pd, ret = %d.\n", ret); return ret; } if (udata) { - struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn}; + struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn}; - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) { hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); - ibdev_err(ib_dev, "failed to copy to udata\n"); - return -EFAULT; + ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret); } } - return 0; + return ret; } -void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); + return 0; } int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index c063c450c715..1116371adf74 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,9 +39,6 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" -#include <rdma/hns-abi.h> - -#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS) static void flush_work_handle(struct work_struct *work) { @@ -116,8 +113,8 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, enum hns_roce_event type) { - struct ib_event event; struct ib_qp *ibqp = &hr_qp->ibqp; + struct ib_event event; if (ibqp->event_handler) { event.device = ibqp->device; @@ -156,9 +153,50 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, } } +static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank) +{ + u32 least_load = bank[0].inuse; + u8 bankid = 0; + u32 bankcnt; + u8 i; + + for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) { + bankcnt = bank[i].inuse; + if (bankcnt < least_load) { + least_load = bankcnt; + bankid = i; + } + } + + return bankid; +} + +static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid, + unsigned long *qpn) +{ + int id; + + id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL); + if (id < 0) { + id = ida_alloc_range(&bank->ida, bank->min, bank->max, + GFP_KERNEL); + if (id < 0) + return id; + } + + /* the QPN should keep increasing until the max value is reached. */ + bank->next = (id + 1) > bank->max ? bank->min : id + 1; + + /* the lower 3 bits is bankid */ + *qpn = (id << 3) | bankid; + + return 0; +} static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; unsigned long num = 0; + u8 bankid; int ret; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { @@ -171,13 +209,21 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->doorbell_qpn = 1; } else { - ret = hns_roce_bitmap_alloc_range(&hr_dev->qp_table.bitmap, - 1, 1, &num); + mutex_lock(&qp_table->bank_mutex); + bankid = get_least_load_bankid_for_qp(qp_table->bank); + + ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, + &num); if (ret) { - ibdev_err(&hr_dev->ib_dev, "Failed to alloc bitmap\n"); - return -ENOMEM; + ibdev_err(&hr_dev->ib_dev, + "failed to alloc QPN, ret = %d\n", ret); + mutex_unlock(&qp_table->bank_mutex); + return ret; } + qp_table->bank[bankid].inuse++; + mutex_unlock(&qp_table->bank_mutex); + hr_qp->doorbell_qpn = (u32)num; } @@ -288,7 +334,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) } } - if (hr_dev->caps.sccc_entry_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { /* Alloc memory for SCC CTX */ ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); @@ -342,9 +388,15 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); } +static inline u8 get_qp_bankid(unsigned long qpn) +{ + /* The lower 3 bits of QPN are used to hash to different banks */ + return (u8)(qpn & GENMASK(2, 0)); +} + static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + u8 bankid; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) return; @@ -352,7 +404,13 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->qpn < hr_dev->caps.reserved_qps) return; - hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); + bankid = get_qp_bankid(hr_qp->qpn); + + ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); + + mutex_lock(&hr_dev->qp_table.bank_mutex); + hr_dev->qp_table.bank[bankid].inuse--; + mutex_unlock(&hr_dev->qp_table.bank_mutex); } static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, @@ -406,37 +464,43 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return 0; } -static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, - struct hns_roce_qp *hr_qp, - struct ib_qp_cap *cap) +static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) { - u32 cnt; + /* GSI/UD QP only has extended sge */ + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) + return qp->sq.max_gs; - cnt = max(1U, cap->max_send_sge); - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { - hr_qp->sq.max_gs = roundup_pow_of_two(cnt); - hr_qp->sge.sge_cnt = 0; + if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) + return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; - return 0; - } + return 0; +} + +static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, + struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) +{ + u32 total_sge_cnt; + u32 wqe_sge_cnt; - hr_qp->sq.max_gs = cnt; + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - /* UD sqwqe's sge use extend sge */ - if (hr_qp->ibqp.qp_type == IB_QPT_GSI || - hr_qp->ibqp.qp_type == IB_QPT_UD) { - cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs); - } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { - cnt = roundup_pow_of_two(sq_wqe_cnt * - (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - } else { - cnt = 0; + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; + return; } - hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - hr_qp->sge.sge_cnt = cnt; + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); - return 0; + wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); + + /* If the number of extended sge is not zero, they MUST use the + * space of HNS_HW_PAGE_SIZE at least. + */ + if (wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); + hr_qp->sge.sge_cnt = max(total_sge_cnt, + (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); + } } static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, @@ -449,12 +513,12 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, /* Sanity check SQ size before proceeding */ if (ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ stride size\n"); + ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n"); return -EINVAL; } if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ SGE size %d\n", + ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n", cap->max_send_sge); return -EINVAL; } @@ -481,9 +545,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, return ret; } - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); - if (ret) - return ret; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; @@ -548,20 +610,18 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, { struct ib_device *ibdev = &hr_dev->ib_dev; u32 cnt; - int ret; if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || - cap->max_send_sge > hr_dev->caps.max_sq_sg || - cap->max_inline_data > hr_dev->caps.max_sq_inline) { + cap->max_send_sge > hr_dev->caps.max_sq_sg) { ibdev_err(ibdev, - "failed to check SQ WR, SGE or inline num, ret = %d.\n", + "failed to check SQ WR or SGE num, ret = %d.\n", -EINVAL); return -EINVAL; } cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); if (cnt > hr_dev->caps.max_wqes) { - ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n", + ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n", cnt); return -EINVAL; } @@ -569,17 +629,12 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); hr_qp->sq.wqe_cnt = cnt; - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); - if (ret) - return ret; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); /* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; cap->max_send_sge = hr_qp->sq.max_gs; - /* We don't support inline sends for kernel QPs (yet) */ - cap->max_inline_data = 0; - return 0; } @@ -731,13 +786,17 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_device *ibdev = &hr_dev->ib_dev; int ret; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB; + if (udata) { if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, - "Failed to map user SQ doorbell\n"); + "failed to map user SQ doorbell, ret = %d.\n", + ret); goto err_out; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; @@ -749,7 +808,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, - "Failed to map user RQ doorbell\n"); + "failed to map user RQ doorbell, ret = %d.\n", + ret); goto err_sdb; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; @@ -766,7 +826,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0); if (ret) { ibdev_err(ibdev, - "Failed to alloc kernel RQ doorbell\n"); + "failed to alloc kernel RQ doorbell, ret = %d.\n", + ret); goto err_out; } *hr_qp->rdb.db_record = 0; @@ -809,14 +870,14 @@ static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev, sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(sq_wrid)) { - ibdev_err(ibdev, "Failed to alloc SQ wrid\n"); + ibdev_err(ibdev, "failed to alloc SQ wrid.\n"); return -ENOMEM; } if (hr_qp->rq.wqe_cnt) { rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(rq_wrid)) { - ibdev_err(ibdev, "Failed to alloc RQ wrid\n"); + ibdev_err(ibdev, "failed to alloc RQ wrid.\n"); ret = -ENOMEM; goto err_sq; } @@ -847,6 +908,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->ibqp.qp_type = init_attr->qp_type; + if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline) + init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline; + + hr_qp->max_inline_data = init_attr->cap.max_inline_data; + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; else @@ -861,29 +927,25 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } if (udata) { - if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) { - ibdev_err(ibdev, "Failed to copy QP ucmd\n"); - return -EFAULT; + ret = ib_copy_from_udata(ucmd, udata, + min(udata->inlen, sizeof(*ucmd))); + if (ret) { + ibdev_err(ibdev, + "failed to copy QP ucmd, ret = %d\n", ret); + return ret; } ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); if (ret) - ibdev_err(ibdev, "Failed to set user SQ size\n"); + ibdev_err(ibdev, + "failed to set user SQ size, ret = %d.\n", + ret); } else { - if (init_attr->create_flags & - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - ibdev_err(ibdev, "Failed to check multicast loopback\n"); - return -EINVAL; - } - - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { - ibdev_err(ibdev, "Failed to check ipoib ud lso\n"); - return -EINVAL; - } - ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) - ibdev_err(ibdev, "Failed to set kernel SQ size\n"); + ibdev_err(ibdev, + "failed to set kernel SQ size, ret = %d.\n", + ret); } return ret; @@ -907,47 +969,53 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; + if (init_attr->create_flags) + return -EOPNOTSUPP; + ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { - ibdev_err(ibdev, "Failed to set QP param\n"); + ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret); return ret; } if (!udata) { ret = alloc_kernel_wrid(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc wrid\n"); + ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n", + ret); return ret; } } ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP doorbell\n"); + ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n", + ret); goto err_wrid; } ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP buffer\n"); + ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret); goto err_db; } ret = alloc_qpn(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QPN\n"); + ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret); goto err_buf; } ret = alloc_qpc(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP context\n"); + ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n", + ret); goto err_qpn; } ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr); if (ret) { - ibdev_err(ibdev, "Failed to store QP\n"); + ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret); goto err_qpc; } @@ -1004,6 +1072,30 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, kfree(hr_qp); } +static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type, + bool is_user) +{ + switch (type) { + case IB_QPT_UD: + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && + is_user) + goto out; + fallthrough; + case IB_QPT_RC: + case IB_QPT_GSI: + break; + default: + goto out; + } + + return 0; + +out: + ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type); + + return -EOPNOTSUPP; +} + struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -1013,52 +1105,26 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct hns_roce_qp *hr_qp; int ret; - switch (init_attr->qp_type) { - case IB_QPT_RC: { - hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); - if (!hr_qp) - return ERR_PTR(-ENOMEM); - - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, - hr_qp); - if (ret) { - ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n", - hr_qp->qpn, ret); - kfree(hr_qp); - return ERR_PTR(ret); - } - - break; - } - case IB_QPT_GSI: { - /* Userspace is not allowed to create special QPs: */ - if (udata) { - ibdev_err(ibdev, "not support usr space GSI\n"); - return ERR_PTR(-EINVAL); - } + ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); + if (ret) + return ERR_PTR(ret); - hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); - if (!hr_qp) - return ERR_PTR(-ENOMEM); + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (!hr_qp) + return ERR_PTR(-ENOMEM); + if (init_attr->qp_type == IB_QPT_GSI) { hr_qp->port = init_attr->port_num - 1; hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; + } - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, - hr_qp); - if (ret) { - ibdev_err(ibdev, "Create GSI QP failed!\n"); - kfree(hr_qp); - return ERR_PTR(ret); - } + ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp); + if (ret) { + ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", + init_attr->qp_type, ret); - break; - } - default:{ - ibdev_err(ibdev, "not support QP type %d\n", - init_attr->qp_type); - return ERR_PTR(-EOPNOTSUPP); - } + kfree(hr_qp); + return ERR_PTR(ret); } return &hr_qp->ibqp; @@ -1113,9 +1179,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { - ibdev_err(&hr_dev->ib_dev, - "attr port_num invalid.attr->port_num=%d\n", - attr->port_num); + ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n", + attr->port_num); return -EINVAL; } @@ -1123,8 +1188,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { ibdev_err(&hr_dev->ib_dev, - "attr pkey_index invalid.attr->pkey_index=%d\n", - attr->pkey_index); + "invalid attr, pkey_index = %u.\n", + attr->pkey_index); return -EINVAL; } } @@ -1132,16 +1197,16 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", - attr->max_rd_atomic); + "invalid attr, max_rd_atomic = %u.\n", + attr->max_rd_atomic); return -EINVAL; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", - attr->max_dest_rd_atomic); + "invalid attr, max_dest_rd_atomic = %u.\n", + attr->max_dest_rd_atomic); return -EINVAL; } @@ -1161,8 +1226,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, mutex_lock(&hr_qp->mutex); - cur_state = attr_mask & IB_QP_CUR_STATE ? - attr->cur_qp_state : (enum ib_qp_state)hr_qp->state; + if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state) + goto out; + + cur_state = hr_qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (ibqp->uobject && @@ -1264,22 +1331,22 @@ static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset) return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); } -void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); } -void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); } -void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } -bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq) { struct hns_roce_cq *hr_cq; @@ -1300,22 +1367,25 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - int reserved_from_top = 0; - int reserved_from_bot; - int ret; + unsigned int reserved_from_bot; + unsigned int i; mutex_init(&qp_table->scc_mutex); + mutex_init(&qp_table->bank_mutex); xa_init(&hr_dev->qp_table_xa); reserved_from_bot = hr_dev->caps.reserved_qps; - ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, - hr_dev->caps.num_qps - 1, reserved_from_bot, - reserved_from_top); - if (ret) { - dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n", - ret); - return ret; + for (i = 0; i < reserved_from_bot; i++) { + hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++; + hr_dev->qp_table.bank[get_qp_bankid(i)].min++; + } + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { + ida_init(&hr_dev->qp_table.bank[i].ida); + hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps / + HNS_ROCE_QP_BANK_NUM - 1; + hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min; } return 0; @@ -1323,5 +1393,8 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) { - hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap); + int i; + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) + ida_destroy(&hr_dev->qp_table.bank[i].ida); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index b9e2dbd372b6..c4ae57e4173a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -4,7 +4,6 @@ */ #include <rdma/ib_umem.h> -#include <rdma/hns-abi.h> #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" @@ -93,7 +92,8 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); return -ENOBUFS; } @@ -101,32 +101,34 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, ARRAY_SIZE(mtts_idx), &dma_handle_idx); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); return -ENOBUFS; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ number, ret = %d.\n", ret); return -ENOMEM; } ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret); + ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); goto err_out; } ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); goto err_put; } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { ret = -ENOMEM; - ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n"); + ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); goto err_xa; } @@ -137,7 +139,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); goto err_xa; } @@ -198,7 +200,8 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.srqwqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) - ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", err); return err; } @@ -229,18 +232,18 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) { - ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ idx mtr, ret = %d.\n", err); return err; } if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { - ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n"); + ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); err = -ENOMEM; goto err_idx_mtr; } - } return 0; @@ -285,9 +288,13 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_srq *srq = to_hr_srq(ib_srq); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_srq ucmd = {}; - int ret = 0; + int ret; u32 cqn; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Check the actual SRQ wqe and SRQ sge num */ if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) @@ -300,9 +307,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->max_gs = init_attr->attr.max_sge; if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", ret); return ret; } @@ -310,20 +318,21 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ buffer, ret = %d.\n", ret); return ret; } ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); goto err_buf_alloc; } if (!udata) { ret = alloc_srq_wrid(hr_dev, srq); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n", + ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", ret); goto err_idx_alloc; } @@ -335,7 +344,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ context, ret = %d.\n", ret); goto err_wrid_alloc; } @@ -343,11 +353,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, resp.srqn = srq->srqn; if (udata) { - if (ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp)))) { - ret = -EFAULT; + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) goto err_srqc_alloc; - } } return 0; @@ -363,7 +372,7 @@ err_buf_alloc: return ret; } -void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); @@ -372,6 +381,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) free_srq_idx(hr_dev, srq); free_srq_wrid(srq); free_srq_buf(hr_dev, srq); + return 0; } int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 25747b85a79c..6a79502c8b53 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -274,7 +274,6 @@ struct i40iw_device { u8 max_sge; u8 iw_status; u8 send_term_ok; - bool push_mode; /* Initialized from parameter passed to driver */ /* x710 specific */ struct mutex pbl_mutex; @@ -409,8 +408,8 @@ static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp) } /* i40iw.c */ -void i40iw_add_ref(struct ib_qp *); -void i40iw_rem_ref(struct ib_qp *); +void i40iw_qp_add_ref(struct ib_qp *ibqp); +void i40iw_qp_rem_ref(struct ib_qp *ibqp); struct ib_qp *i40iw_get_qp(struct ib_device *, int); void i40iw_flush_wqes(struct i40iw_device *iwdev, @@ -554,9 +553,8 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, bool wait); void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf); void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp); -void i40iw_free_qp_resources(struct i40iw_device *iwdev, - struct i40iw_qp *iwqp, - u32 qp_num); +void i40iw_free_qp_resources(struct i40iw_qp *iwqp); + enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev, struct i40iw_dma_mem *memptr, u32 size, u32 mask); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index a3b95805c154..9acc0ecc9a43 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2322,7 +2322,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) iwqp = cm_node->iwqp; if (iwqp) { iwqp->cm_node = NULL; - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); cm_node->iwqp = NULL; } else if (cm_node->qhash_set) { i40iw_get_addr_info(cm_node, &nfo); @@ -2426,7 +2426,7 @@ static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node, } break; case I40IW_CM_STATE_MPAREQ_RCVD: - atomic_add_return(1, &cm_node->passive_state); + atomic_inc(&cm_node->passive_state); break; case I40IW_CM_STATE_ESTABLISHED: case I40IW_CM_STATE_SYN_RCVD: @@ -3020,7 +3020,7 @@ static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 i40iw_cleanup_retrans_entry(cm_node); if (!loopback) { - passive_state = atomic_add_return(1, &cm_node->passive_state); + passive_state = atomic_inc_return(&cm_node->passive_state); if (passive_state == I40IW_SEND_RESET_EVENT) { cm_node->state = I40IW_CM_STATE_CLOSED; i40iw_rem_ref_cm_node(cm_node); @@ -3452,7 +3452,7 @@ void i40iw_cm_disconn(struct i40iw_qp *iwqp) kfree(work); return; } - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->qptable_lock, flags); work->iwqp = iwqp; @@ -3623,7 +3623,7 @@ static void i40iw_disconnect_worker(struct work_struct *work) kfree(dwork); i40iw_cm_disconn_true(iwqp); - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** @@ -3678,7 +3678,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -EINVAL; } - passive_state = atomic_add_return(1, &cm_node->passive_state); + passive_state = atomic_inc_return(&cm_node->passive_state); if (passive_state == I40IW_SEND_RESET_EVENT) { i40iw_rem_ref_cm_node(cm_node); return -ECONNRESET; @@ -3745,7 +3745,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->lsmm_size = accept.size + conn_param->private_data_len; i40iw_cm_init_tsa_conn(iwqp, cm_node); cm_id->add_ref(cm_id); - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); attr.qp_state = IB_QPS_RTS; cm_node->qhash_set = false; @@ -3908,7 +3908,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; iwqp->cm_id = cm_id; - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); if (cm_node->state != I40IW_CM_STATE_OFFLOADED) { cm_node->state = I40IW_CM_STATE_SYN_SENT; diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 86d3f8aff329..c943d491b72b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -820,46 +820,6 @@ static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done( } /** - * i40iw_sc_manage_push_page - Handle push page - * @cqp: struct for cqp hw - * @info: push page info - * @scratch: u64 saved to be used during cqp completion - * @post_sq: flag for cqp db to ring - */ -static enum i40iw_status_code i40iw_sc_manage_push_page( - struct i40iw_sc_cqp *cqp, - struct i40iw_cqp_manage_push_page_info *info, - u64 scratch, - bool post_sq) -{ - u64 *wqe; - u64 header; - - if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT) - return I40IW_ERR_INVALID_PUSH_PAGE_INDEX; - - wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); - if (!wqe) - return I40IW_ERR_RING_FULL; - - set_64bit_val(wqe, 16, info->qs_handle); - - header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) | - LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) | - LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) | - LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE); - - i40iw_insert_wqe_hdr(wqe, header); - - i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE", - wqe, I40IW_CQP_WQE_SIZE * 8); - - if (post_sq) - i40iw_sc_cqp_post_sq(cqp); - return 0; -} - -/** * i40iw_sc_manage_hmc_pm_func_table - manage of function table * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion @@ -2859,9 +2819,7 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) | LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) | LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) | - LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) | - LS_64(info->push_idx, I40IWQPC_PPIDX) | - LS_64(info->push_mode_en, I40IWQPC_PMENA); + LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN); set_64bit_val(qp_ctx, 8, qp->sq_pa); set_64bit_val(qp_ctx, 16, qp->rq_pa); @@ -4291,13 +4249,6 @@ static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev, pcmdinfo->in.u.add_arp_cache_entry.scratch, pcmdinfo->post_sq); break; - case OP_MANAGE_PUSH_PAGE: - status = i40iw_sc_manage_push_page( - pcmdinfo->in.u.manage_push_page.cqp, - &pcmdinfo->in.u.manage_push_page.info, - pcmdinfo->in.u.manage_push_page.scratch, - pcmdinfo->post_sq); - break; case OP_UPDATE_PE_SDS: /* case I40IW_CQP_OP_UPDATE_PE_SDS */ status = i40iw_update_pe_sds( @@ -5098,7 +5049,7 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi) i40iw_hw_stats_stop_timer(vsi); } -static struct i40iw_cqp_ops iw_cqp_ops = { +static const struct i40iw_cqp_ops iw_cqp_ops = { .cqp_init = i40iw_sc_cqp_init, .cqp_create = i40iw_sc_cqp_create, .cqp_post_sq = i40iw_sc_cqp_post_sq, @@ -5107,7 +5058,7 @@ static struct i40iw_cqp_ops iw_cqp_ops = { .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done }; -static struct i40iw_ccq_ops iw_ccq_ops = { +static const struct i40iw_ccq_ops iw_ccq_ops = { .ccq_init = i40iw_sc_ccq_init, .ccq_create = i40iw_sc_ccq_create, .ccq_destroy = i40iw_sc_ccq_destroy, @@ -5116,7 +5067,7 @@ static struct i40iw_ccq_ops iw_ccq_ops = { .ccq_arm = i40iw_sc_ccq_arm }; -static struct i40iw_ceq_ops iw_ceq_ops = { +static const struct i40iw_ceq_ops iw_ceq_ops = { .ceq_init = i40iw_sc_ceq_init, .ceq_create = i40iw_sc_ceq_create, .cceq_create_done = i40iw_sc_cceq_create_done, @@ -5126,7 +5077,7 @@ static struct i40iw_ceq_ops iw_ceq_ops = { .process_ceq = i40iw_sc_process_ceq }; -static struct i40iw_aeq_ops iw_aeq_ops = { +static const struct i40iw_aeq_ops iw_aeq_ops = { .aeq_init = i40iw_sc_aeq_init, .aeq_create = i40iw_sc_aeq_create, .aeq_destroy = i40iw_sc_aeq_destroy, @@ -5137,11 +5088,11 @@ static struct i40iw_aeq_ops iw_aeq_ops = { }; /* iwarp pd ops */ -static struct i40iw_pd_ops iw_pd_ops = { +static const struct i40iw_pd_ops iw_pd_ops = { .pd_init = i40iw_sc_pd_init, }; -static struct i40iw_priv_qp_ops iw_priv_qp_ops = { +static const struct i40iw_priv_qp_ops iw_priv_qp_ops = { .qp_init = i40iw_sc_qp_init, .qp_create = i40iw_sc_qp_create, .qp_modify = i40iw_sc_qp_modify, @@ -5156,14 +5107,14 @@ static struct i40iw_priv_qp_ops iw_priv_qp_ops = { .iw_mr_fast_register = i40iw_sc_mr_fast_register }; -static struct i40iw_priv_cq_ops iw_priv_cq_ops = { +static const struct i40iw_priv_cq_ops iw_priv_cq_ops = { .cq_init = i40iw_sc_cq_init, .cq_create = i40iw_sc_cq_create, .cq_destroy = i40iw_sc_cq_destroy, .cq_modify = i40iw_sc_cq_modify, }; -static struct i40iw_mr_ops iw_mr_ops = { +static const struct i40iw_mr_ops iw_mr_ops = { .alloc_stag = i40iw_sc_alloc_stag, .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared, .mr_reg_shared = i40iw_sc_mr_reg_shared, @@ -5172,8 +5123,7 @@ static struct i40iw_mr_ops iw_mr_ops = { .mw_alloc = i40iw_sc_mw_alloc }; -static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { - .manage_push_page = i40iw_sc_manage_push_page, +static const struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table, .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile, .commit_fpm_values = i40iw_sc_commit_fpm_values, @@ -5195,7 +5145,7 @@ static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .update_resume_qp = i40iw_sc_resume_qp }; -static struct i40iw_hmc_ops iw_hmc_ops = { +static const struct i40iw_hmc_ops iw_hmc_ops = { .init_iw_hmc = i40iw_sc_init_iw_hmc, .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf, .configure_iw_fpm = i40iw_sc_configure_iw_fpm, diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index e8367d67575d..86d5a33c57cc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -40,11 +40,6 @@ #define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024) #define I40IW_VF_DB_ADDR_OFFSET (64 * 1024) -#define I40IW_PUSH_OFFSET (4 * 1024 * 1024) -#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16 -#define I40IW_VF_PUSH_OFFSET ((8 + 64) * 1024) -#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2 - #define I40IW_PE_DB_SIZE_4M 1 #define I40IW_PE_DB_SIZE_8M 2 @@ -402,7 +397,6 @@ #define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE 0x0e #define I40IW_CQP_OP_MANAGE_ARP 0x0f #define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10 -#define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11 #define I40IW_CQP_OP_QUERY_RDMA_FEATURES 0x12 #define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13 #define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14 @@ -843,7 +837,6 @@ #define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \ (0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT) -/* Manage Push Page - MPP */ #define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff #define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0 @@ -1352,9 +1345,6 @@ #define I40IWQPSQ_ADDFRAGCNT_SHIFT 38 #define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT) -#define I40IWQPSQ_PUSHWQE_SHIFT 56 -#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT) - #define I40IWQPSQ_STREAMMODE_SHIFT 58 #define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT) @@ -1740,18 +1730,17 @@ enum i40iw_alignment { #define OP_MW_ALLOC 20 #define OP_QP_FLUSH_WQES 21 #define OP_ADD_ARP_CACHE_ENTRY 22 -#define OP_MANAGE_PUSH_PAGE 23 -#define OP_UPDATE_PE_SDS 24 -#define OP_MANAGE_HMC_PM_FUNC_TABLE 25 -#define OP_SUSPEND 26 -#define OP_RESUME 27 -#define OP_MANAGE_VF_PBLE_BP 28 -#define OP_QUERY_FPM_VALUES 29 -#define OP_COMMIT_FPM_VALUES 30 -#define OP_REQUESTED_COMMANDS 31 -#define OP_COMPLETED_COMMANDS 32 -#define OP_GEN_AE 33 -#define OP_QUERY_RDMA_FEATURES 34 -#define OP_SIZE_CQP_STAT_ARRAY 35 +#define OP_UPDATE_PE_SDS 23 +#define OP_MANAGE_HMC_PM_FUNC_TABLE 24 +#define OP_SUSPEND 25 +#define OP_RESUME 26 +#define OP_MANAGE_VF_PBLE_BP 27 +#define OP_QUERY_FPM_VALUES 28 +#define OP_COMMIT_FPM_VALUES 29 +#define OP_REQUESTED_COMMANDS 30 +#define OP_COMPLETED_COMMANDS 31 +#define OP_GEN_AE 32 +#define OP_QUERY_RDMA_FEATURES 33 +#define OP_SIZE_CQP_STAT_ARRAY 34 #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index e1085634b8d9..56fdc161f6f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -313,7 +313,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) __func__, info->qp_cq_id); continue; } - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->qptable_lock, flags); qp = &iwqp->sc_qp; spin_lock_irqsave(&iwqp->lock, flags); @@ -426,7 +426,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) break; } if (info->qp) - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } while (1); if (aeqcnt) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 58a433135a03..584932d3cc44 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -54,10 +54,6 @@ #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD) -static int push_mode; -module_param(push_mode, int, 0644); -MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)"); - static int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all"); @@ -192,9 +188,9 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id) * i40iw_dpc - tasklet for aeq and ceq 0 * @data: iwarp device */ -static void i40iw_dpc(unsigned long data) +static void i40iw_dpc(struct tasklet_struct *t) { - struct i40iw_device *iwdev = (struct i40iw_device *)data; + struct i40iw_device *iwdev = from_tasklet(iwdev, t, dpc_tasklet); if (iwdev->msix_shared) i40iw_process_ceq(iwdev, iwdev->ceqlist); @@ -206,9 +202,9 @@ static void i40iw_dpc(unsigned long data) * i40iw_ceq_dpc - dpc handler for CEQ * @data: data points to CEQ */ -static void i40iw_ceq_dpc(unsigned long data) +static void i40iw_ceq_dpc(struct tasklet_struct *t) { - struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data; + struct i40iw_ceq *iwceq = from_tasklet(iwceq, t, dpc_tasklet); struct i40iw_device *iwdev = iwceq->iwdev; i40iw_process_ceq(iwdev, iwceq); @@ -689,10 +685,10 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw enum i40iw_status_code status; if (iwdev->msix_shared && !ceq_id) { - tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); + tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc); status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev); } else { - tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq); + tasklet_setup(&iwceq->dpc_tasklet, i40iw_ceq_dpc); status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); } @@ -841,7 +837,7 @@ static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iw u32 ret = 0; if (!iwdev->msix_shared) { - tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); + tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc); ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev); } if (ret) { @@ -1573,14 +1569,13 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, status = i40iw_save_msix_info(iwdev, ldev); if (status) return status; - iwdev->hw.dev_context = (void *)ldev->pcidev; + iwdev->hw.pcidev = ldev->pcidev; iwdev->hw.hw_addr = ldev->hw_addr; status = i40iw_allocate_dma_mem(&iwdev->hw, &iwdev->obj_mem, 8192, 4096); if (status) goto exit; iwdev->obj_next = iwdev->obj_mem; - iwdev->push_mode = push_mode; init_waitqueue_head(&iwdev->vchnl_waitq); init_waitqueue_head(&dev->vf_reqs); diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index 540aab5e502d..5f97643e22e5 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -167,7 +167,7 @@ static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev, */ static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; int i; if (!chunk->pg_cnt) @@ -193,7 +193,7 @@ static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk, int pg_cnt) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; struct page *page; u8 *addr; u32 size; diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h index d1c5855bd8c3..36a19c4e5bba 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_status.h +++ b/drivers/infiniband/hw/i40iw/i40iw_status.h @@ -61,7 +61,6 @@ enum i40iw_status_code { I40IW_ERR_QUEUE_EMPTY = -22, I40IW_ERR_INVALID_ALIGNMENT = -23, I40IW_ERR_FLUSHED_QUEUE = -24, - I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25, I40IW_ERR_INVALID_INLINE_DATA_SIZE = -26, I40IW_ERR_TIMEOUT = -27, I40IW_ERR_OPCODE_MISMATCH = -28, diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 54c323c40d96..394e182686cf 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -73,6 +73,7 @@ struct i40iw_pd_ops; struct i40iw_priv_qp_ops; struct i40iw_priv_cq_ops; struct i40iw_hmc_ops; +struct pci_dev; enum i40iw_page_size { I40IW_PAGE_SIZE_4K, @@ -261,7 +262,7 @@ struct i40iw_vsi_pestat { struct i40iw_hw { u8 __iomem *hw_addr; - void *dev_context; + struct pci_dev *pcidev; struct i40iw_hmc_info hmc; }; @@ -386,7 +387,6 @@ struct i40iw_sc_qp { u8 *q2_buf; u64 qp_compl_ctx; u16 qs_handle; - u16 push_idx; u8 sq_tph_val; u8 rq_tph_val; u8 qp_state; @@ -492,16 +492,16 @@ struct i40iw_sc_dev { struct i40iw_sc_aeq *aeq; struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT]; struct i40iw_sc_cq *ccq; - struct i40iw_cqp_ops *cqp_ops; - struct i40iw_ccq_ops *ccq_ops; - struct i40iw_ceq_ops *ceq_ops; - struct i40iw_aeq_ops *aeq_ops; - struct i40iw_pd_ops *iw_pd_ops; - struct i40iw_priv_qp_ops *iw_priv_qp_ops; - struct i40iw_priv_cq_ops *iw_priv_cq_ops; - struct i40iw_mr_ops *mr_ops; - struct i40iw_cqp_misc_ops *cqp_misc_ops; - struct i40iw_hmc_ops *hmc_ops; + const struct i40iw_cqp_ops *cqp_ops; + const struct i40iw_ccq_ops *ccq_ops; + const struct i40iw_ceq_ops *ceq_ops; + const struct i40iw_aeq_ops *aeq_ops; + const struct i40iw_pd_ops *iw_pd_ops; + const struct i40iw_priv_qp_ops *iw_priv_qp_ops; + const struct i40iw_priv_cq_ops *iw_priv_cq_ops; + const struct i40iw_mr_ops *mr_ops; + const struct i40iw_cqp_misc_ops *cqp_misc_ops; + const struct i40iw_hmc_ops *hmc_ops; struct i40iw_vchnl_if vchnl_if; const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; @@ -748,8 +748,6 @@ struct i40iw_qp_host_ctx_info { struct i40iwarp_offload_info *iwarp_info; u32 send_cq_num; u32 rcv_cq_num; - u16 push_idx; - bool push_mode_en; bool tcp_info_valid; bool iwarp_info_valid; bool err_rq_idx_valid; @@ -936,12 +934,6 @@ struct i40iw_local_mac_ipaddr_entry_info { u8 entry_idx; }; -struct i40iw_cqp_manage_push_page_info { - u32 push_idx; - u16 qs_handle; - u8 free_page; -}; - struct i40iw_qp_flush_info { u16 sq_minor_code; u16 sq_major_code; @@ -1113,9 +1105,6 @@ struct i40iw_mr_ops { }; struct i40iw_cqp_misc_ops { - enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *, - struct i40iw_cqp_manage_push_page_info *, - u64, bool); enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *, u64, u8, bool, bool); enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *, @@ -1253,12 +1242,6 @@ struct cqp_info { } manage_vf_pble_bp; struct { - struct i40iw_sc_cqp *cqp; - struct i40iw_cqp_manage_push_page_info info; - u64 scratch; - } manage_push_page; - - struct { struct i40iw_sc_dev *dev; struct i40iw_upload_context_info info; u64 scratch; diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 8afa5a67a86b..c3633c9944db 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -115,17 +115,6 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp) } /** - * i40iw_qp_ring_push_db - ring qp doorbell - * @qp: hw qp ptr - * @wqe_idx: wqe index - */ -static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx) -{ - set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id); - qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); -} - -/** * i40iw_qp_get_next_send_wqe - return next wqe ptr * @qp: hw qp ptr * @wqe_idx: return wqe index @@ -426,7 +415,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, u64 *wqe; u8 *dest, *src; struct i40iw_inline_rdma_write *op_info; - u64 *push; u64 header = 0; u32 wqe_idx; enum i40iw_status_code ret_code; @@ -453,7 +441,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) | LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | LS_64(read_fence, I40IWQPSQ_READFENCE) | LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | @@ -475,14 +462,8 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, set_64bit_val(wqe, 24, header); - if (qp->push_db) { - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); - i40iw_qp_ring_push_db(qp, wqe_idx); - } else { - if (post_sq) - i40iw_qp_post_wr(qp); - } + if (post_sq) + i40iw_qp_post_wr(qp); return 0; } @@ -507,7 +488,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, enum i40iw_status_code ret_code; bool read_fence = false; u8 wqe_size; - u64 *push; op_info = &info->op.inline_send; if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE) @@ -526,7 +506,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, LS_64(info->op_type, I40IWQPSQ_OPCODE) | LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | LS_64(read_fence, I40IWQPSQ_READFENCE) | LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | @@ -548,14 +527,8 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, set_64bit_val(wqe, 24, header); - if (qp->push_db) { - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); - i40iw_qp_ring_push_db(qp, wqe_idx); - } else { - if (post_sq) - i40iw_qp_post_wr(qp); - } + if (post_sq) + i40iw_qp_post_wr(qp); return 0; } @@ -772,7 +745,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, q_type = (u8)RS_64(qword3, I40IW_CQ_SQ); info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR); - info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP); if (info->error) { info->comp_status = I40IW_COMPL_STATUS_FLUSHED; info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR); @@ -951,7 +923,6 @@ enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth) static const struct i40iw_qp_uk_ops iw_qp_uk_ops = { .iw_qp_post_wr = i40iw_qp_post_wr, - .iw_qp_ring_push_db = i40iw_qp_ring_push_db, .iw_rdma_write = i40iw_rdma_write, .iw_rdma_read = i40iw_rdma_read, .iw_send = i40iw_send, @@ -1009,11 +980,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp, qp->wqe_alloc_reg = info->wqe_alloc_reg; qp->qp_id = info->qp_id; - qp->sq_size = info->sq_size; - qp->push_db = info->push_db; - qp->push_wqe = info->push_wqe; - qp->max_sq_frag_cnt = info->max_sq_frag_cnt; sq_ring_size = qp->sq_size << sqshift; diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index b125925641e0..93fc3081dd65 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -64,13 +64,11 @@ enum i40iw_device_capabilities_const { I40IW_MAX_SGE_RD = 1, I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647, I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647, - I40IW_MAX_PUSH_PAGE_COUNT = 4096, I40IW_MAX_PE_ENABLED_VF_COUNT = 32, I40IW_MAX_VF_FPM_ID = 47, I40IW_MAX_VF_PER_PF = 127, I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, I40IW_MAX_INLINE_DATA_SIZE = 48, - I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, I40IW_MAX_IRD_SIZE = 64, I40IW_MAX_ORD_SIZE = 127, I40IW_MAX_WQ_ENTRIES = 2048, @@ -272,7 +270,6 @@ struct i40iw_cq_poll_info { u16 minor_err; u8 op_type; bool stag_invalid_set; - bool push_dropped; bool error; bool is_srq; bool solicited_event; @@ -280,7 +277,6 @@ struct i40iw_cq_poll_info { struct i40iw_qp_uk_ops { void (*iw_qp_post_wr)(struct i40iw_qp_uk *); - void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32); enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *, struct i40iw_post_sq_info *, bool); enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *, @@ -340,8 +336,6 @@ struct i40iw_qp_uk { struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; u64 *shadow_area; - u32 *push_db; - u64 *push_wqe; struct i40iw_ring sq_ring; struct i40iw_ring rq_ring; struct i40iw_ring initial_ring; @@ -381,8 +375,6 @@ struct i40iw_qp_uk_init_info { u64 *shadow_area; struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; - u32 *push_db; - u64 *push_wqe; u32 qp_id; u32 sq_size; u32 rq_size; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index e07fb37af086..644f8c641aa0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -478,25 +478,6 @@ void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev) } /** - * i40iw_free_qp - callback after destroy cqp completes - * @cqp_request: cqp request for destroy qp - * @num: not used - */ -static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num) -{ - struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param; - struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; - struct i40iw_device *iwdev; - u32 qp_num = iwqp->ibqp.qp_num; - - iwdev = iwqp->iwdev; - - i40iw_rem_pdusecount(iwqp->iwpd, iwdev); - i40iw_free_qp_resources(iwdev, iwqp, qp_num); - i40iw_rem_devusecount(iwdev); -} - -/** * i40iw_wait_event - wait for completion * @iwdev: iwarp device * @cqp_request: cqp request to wait @@ -616,26 +597,23 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev) } /** - * i40iw_add_ref - add refcount for qp + * i40iw_qp_add_ref - add refcount for qp * @ibqp: iqarp qp */ -void i40iw_add_ref(struct ib_qp *ibqp) +void i40iw_qp_add_ref(struct ib_qp *ibqp) { struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp; - atomic_inc(&iwqp->refcount); + refcount_inc(&iwqp->refcount); } /** - * i40iw_rem_ref - rem refcount for qp and free if 0 + * i40iw_qp_rem_ref - rem refcount for qp and free if 0 * @ibqp: iqarp qp */ -void i40iw_rem_ref(struct ib_qp *ibqp) +void i40iw_qp_rem_ref(struct ib_qp *ibqp) { struct i40iw_qp *iwqp; - enum i40iw_status_code status; - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; struct i40iw_device *iwdev; u32 qp_num; unsigned long flags; @@ -643,7 +621,7 @@ void i40iw_rem_ref(struct ib_qp *ibqp) iwqp = to_iwqp(ibqp); iwdev = iwqp->iwdev; spin_lock_irqsave(&iwdev->qptable_lock, flags); - if (!atomic_dec_and_test(&iwqp->refcount)) { + if (!refcount_dec_and_test(&iwqp->refcount)) { spin_unlock_irqrestore(&iwdev->qptable_lock, flags); return; } @@ -651,25 +629,8 @@ void i40iw_rem_ref(struct ib_qp *ibqp) qp_num = iwqp->ibqp.qp_num; iwdev->qp_table[qp_num] = NULL; spin_unlock_irqrestore(&iwdev->qptable_lock, flags); - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); - if (!cqp_request) - return; - - cqp_request->callback_fcn = i40iw_free_qp; - cqp_request->param = (void *)&iwqp->sc_qp; - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_QP_DESTROY; - cqp_info->post_sq = 1; - cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp; - cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request; - cqp_info->in.u.qp_destroy.remove_hash_idx = true; - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - return; + complete(&iwqp->free_qp); - i40iw_rem_pdusecount(iwqp->iwpd, iwdev); - i40iw_free_qp_resources(iwdev, iwqp, qp_num); - i40iw_rem_devusecount(iwdev); } /** @@ -751,7 +712,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw, u64 size, u32 alignment) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; if (!mem) return I40IW_ERR_PARAM; @@ -770,7 +731,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw, */ void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; if (!mem || !mem->va) return; @@ -936,7 +897,7 @@ static void i40iw_terminate_timeout(struct timer_list *t) struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp; i40iw_terminate_done(qp, 1); - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** @@ -948,7 +909,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp) struct i40iw_qp *iwqp; iwqp = (struct i40iw_qp *)qp->back_qp; - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0); iwqp->terminate_timer.expires = jiffies + HZ; add_timer(&iwqp->terminate_timer); @@ -964,7 +925,7 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp) iwqp = (struct i40iw_qp *)qp->back_qp; if (del_timer(&iwqp->terminate_timer)) - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index b51339328a51..65aedfe57e77 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -167,111 +167,16 @@ static void i40iw_dealloc_ucontext(struct ib_ucontext *context) */ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { - struct i40iw_ucontext *ucontext; - u64 db_addr_offset, push_offset, pfn; - - ucontext = to_ucontext(context); - if (ucontext->iwdev->sc_dev.is_pf) { - db_addr_offset = I40IW_DB_ADDR_OFFSET; - push_offset = I40IW_PUSH_OFFSET; - if (vma->vm_pgoff) - vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1; - } else { - db_addr_offset = I40IW_VF_DB_ADDR_OFFSET; - push_offset = I40IW_VF_PUSH_OFFSET; - if (vma->vm_pgoff) - vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1; - } - - vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT; - - if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) { - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - } else { - if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - else - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - } - - pfn = vma->vm_pgoff + - (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >> - PAGE_SHIFT); - - return rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE, - vma->vm_page_prot, NULL); -} - -/** - * i40iw_alloc_push_page - allocate a push page for qp - * @iwdev: iwarp device - * @qp: hardware control qp - */ -static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) -{ - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; - enum i40iw_status_code status; + struct i40iw_ucontext *ucontext = to_ucontext(context); + u64 dbaddr; - if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX) - return; - - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true); - if (!cqp_request) - return; - - atomic_inc(&cqp_request->refcount); - - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; - cqp_info->post_sq = 1; - - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; - cqp_info->in.u.manage_push_page.info.free_page = 0; - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; - - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - qp->push_idx = cqp_request->compl_info.op_ret_val; - else - i40iw_pr_err("CQP-OP Push page fail"); - i40iw_put_cqp_request(&iwdev->cqp, cqp_request); -} - -/** - * i40iw_dealloc_push_page - free a push page for qp - * @iwdev: iwarp device - * @qp: hardware control qp - */ -static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) -{ - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; - enum i40iw_status_code status; - - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) - return; - - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); - if (!cqp_request) - return; - - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; - cqp_info->post_sq = 1; + if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; - cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx; - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; - cqp_info->in.u.manage_push_page.info.free_page = 1; - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; + dbaddr = I40IW_DB_ADDR_OFFSET + pci_resource_start(ucontext->iwdev->ldev->pcidev, 0); - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; - else - i40iw_pr_err("CQP-OP Push page fail"); + return rdma_user_mmap_io(context, vma, dbaddr >> PAGE_SHIFT, PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot), NULL); } /** @@ -328,12 +233,13 @@ error: * @ibpd: ptr of pd to be deallocated * @udata: user data or null for kernel object */ -static void i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +static int i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct i40iw_pd *iwpd = to_iwpd(ibpd); struct i40iw_device *iwdev = to_iwdev(ibpd->device); i40iw_rem_pdusecount(iwpd, iwdev); + return 0; } /** @@ -363,14 +269,13 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, * @iwqp: qp ptr (user or kernel) * @qp_num: qp number assigned */ -void i40iw_free_qp_resources(struct i40iw_device *iwdev, - struct i40iw_qp *iwqp, - u32 qp_num) +void i40iw_free_qp_resources(struct i40iw_qp *iwqp) { struct i40iw_pbl *iwpbl = &iwqp->iwpbl; + struct i40iw_device *iwdev = iwqp->iwdev; + u32 qp_num = iwqp->ibqp.qp_num; i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); - i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); if (qp_num) i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num); if (iwpbl->pbl_allocated) @@ -379,7 +284,7 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev, i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem); kfree(iwqp->kqp.wrid_mem); iwqp->kqp.wrid_mem = NULL; - kfree(iwqp->allocated_buffer); + kfree(iwqp); } /** @@ -401,6 +306,10 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct i40iw_qp *iwqp = to_iwqp(ibqp); + struct ib_qp_attr attr; + struct i40iw_device *iwdev = iwqp->iwdev; + + memset(&attr, 0, sizeof(attr)); iwqp->destroyed = 1; @@ -415,7 +324,15 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) } } - i40iw_rem_ref(&iwqp->ibqp); + attr.qp_state = IB_QPS_ERR; + i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); + i40iw_qp_rem_ref(&iwqp->ibqp); + wait_for_completion(&iwqp->free_qp); + i40iw_cqp_qp_destroy_cmd(&iwdev->sc_dev, &iwqp->sc_qp); + i40iw_rem_pdusecount(iwqp->iwpd, iwdev); + i40iw_free_qp_resources(iwqp); + i40iw_rem_devusecount(iwdev); + return 0; } @@ -524,7 +441,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, struct i40iw_create_qp_req req; struct i40iw_create_qp_resp uresp; u32 qp_num = 0; - void *mem; enum i40iw_status_code ret; int err_code; int sq_size; @@ -544,7 +460,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENODEV); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; @@ -566,16 +482,13 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; - mem = kzalloc(sizeof(*iwqp), GFP_KERNEL); - if (!mem) + iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL); + if (!iwqp) return ERR_PTR(-ENOMEM); - iwqp = (struct i40iw_qp *)mem; - iwqp->allocated_buffer = mem; qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; - + iwqp->iwdev = iwdev; iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info; if (i40iw_allocate_dma_mem(dev->hw, @@ -600,7 +513,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; qp = &iwqp->sc_qp; @@ -619,8 +531,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, err_code = -EOPNOTSUPP; goto error; } - if (iwdev->push_mode) - i40iw_alloc_push_page(iwdev, qp); if (udata) { err_code = ib_copy_from_udata(&req, udata, sizeof(req)); if (err_code) { @@ -679,13 +589,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, ctx_info->iwarp_info_valid = true; ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) { - ctx_info->push_mode_en = false; - } else { - ctx_info->push_mode_en = true; - ctx_info->push_idx = qp->push_idx; - } - ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)iwqp->host_ctx.va, ctx_info); @@ -714,7 +617,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - i40iw_add_ref(&iwqp->ibqp); + refcount_set(&iwqp->refcount, 1); spin_lock_init(&iwqp->lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; iwdev->qp_table[qp_num] = iwqp; @@ -725,7 +628,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, uresp.actual_sq_size = sq_size; uresp.actual_rq_size = rq_size; uresp.qp_id = qp_num; - uresp.push_idx = qp->push_idx; + uresp.push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (err_code) { i40iw_pr_err("copy_to_udata failed\n"); @@ -736,10 +639,11 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, } init_completion(&iwqp->sq_drained); init_completion(&iwqp->rq_drained); + init_completion(&iwqp->free_qp); return &iwqp->ibqp; error: - i40iw_free_qp_resources(iwdev, iwqp, qp_num); + i40iw_free_qp_resources(iwqp); return ERR_PTR(err_code); } @@ -844,6 +748,9 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, u32 err; unsigned long flags; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&info, 0, sizeof(info)); ctx_info = &iwqp->ctx_info; iwarp_info = &iwqp->iwarp_info; @@ -1052,7 +959,7 @@ void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) * @ib_cq: cq pointer * @udata: user data or NULL for kernel object */ -static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct i40iw_cq *iwcq; struct i40iw_device *iwdev; @@ -1064,6 +971,7 @@ static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) i40iw_cq_wq_destroy(iwdev, cq); cq_free_resources(iwdev, iwcq); i40iw_rem_devusecount(iwdev); + return 0; } /** @@ -1092,6 +1000,9 @@ static int i40iw_create_cq(struct ib_cq *ibcq, int err_code; int entries = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (iwdev->closing) return -ENODEV; @@ -1320,8 +1231,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, if (iwmr->type == IW_MEMREG_TYPE_QP) iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl); - rdma_for_each_block(region->sg_head.sgl, &biter, region->nmap, - iwmr->page_size) { + rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) { *pbl = rdma_block_iter_dma_address(&biter); pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx); } @@ -1744,15 +1654,12 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, struct i40iw_mr *iwmr; struct ib_umem *region; struct i40iw_mem_reg_req req; - u64 pbl_depth = 0; u32 stag = 0; u16 access; - u64 region_length; bool use_pbles = false; unsigned long flags; int err = -ENOSYS; int ret; - int pg_shift; if (!udata) return ERR_PTR(-EOPNOTSUPP); @@ -1787,18 +1694,13 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, if (req.reg_type == IW_MEMREG_TYPE_MEM) iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M, virt); - - region_length = region->length + (start & (iwmr->page_size - 1)); - pg_shift = ffs(iwmr->page_size) - 1; - pbl_depth = region_length >> pg_shift; - pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0; iwmr->length = region->length; iwpbl->user_base = virt; palloc = &iwpbl->pble_alloc; iwmr->type = req.reg_type; - iwmr->page_cnt = (u32)pbl_depth; + iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size); switch (req.reg_type) { case IW_MEMREG_TYPE_QP: @@ -2053,7 +1955,7 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev); u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev; - return sprintf(buf, "%x\n", hw_rev); + return sysfs_emit(buf, "%x\n", hw_rev); } static DEVICE_ATTR_RO(hw_rev); @@ -2063,7 +1965,7 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "I40IW\n"); + return sysfs_emit(buf, "I40IW\n"); } static DEVICE_ATTR_RO(hca_type); @@ -2073,7 +1975,7 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%.*s\n", 32, "I40IW Board ID"); + return sysfs_emit(buf, "%.*s\n", 32, "I40IW Board ID"); } static DEVICE_ATTR_RO(board_id); @@ -2636,13 +2538,13 @@ static const struct ib_device_ops i40iw_dev_ops = { .get_hw_stats = i40iw_get_hw_stats, .get_port_immutable = i40iw_port_immutable, .iw_accept = i40iw_accept, - .iw_add_ref = i40iw_add_ref, + .iw_add_ref = i40iw_qp_add_ref, .iw_connect = i40iw_connect, .iw_create_listen = i40iw_create_listen, .iw_destroy_listen = i40iw_destroy_listen, .iw_get_qp = i40iw_get_qp, .iw_reject = i40iw_reject, - .iw_rem_ref = i40iw_rem_ref, + .iw_rem_ref = i40iw_qp_rem_ref, .map_mr_sg = i40iw_map_mr_sg, .mmap = i40iw_mmap, .modify_qp = i40iw_modify_qp, @@ -2668,7 +2570,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev { struct i40iw_ib_device *iwibdev; struct net_device *netdev = iwdev->netdev; - struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context; + struct pci_dev *pcidev = iwdev->hw.pcidev; iwibdev = ib_alloc_device(i40iw_ib_device, ibdev); if (!iwibdev) { @@ -2681,27 +2583,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.node_type = RDMA_NODE_RNIC; ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr); - iwibdev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_POST_SEND); iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; @@ -2758,7 +2639,8 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) if (ret) goto error; - ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); + dma_set_max_seg_size(&iwdev->hw.pcidev->dev, UINT_MAX); + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", &iwdev->hw.pcidev->dev); if (ret) goto error; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 331bc21cbcc7..bab71f3e5637 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -139,7 +139,7 @@ struct i40iw_qp { struct i40iw_qp_host_ctx_info ctx_info; struct i40iwarp_offload_info iwarp_info; void *allocated_buffer; - atomic_t refcount; + refcount_t refcount; struct iw_cm_id *cm_id; void *cm_node; struct ib_mr *lsmm_mr; @@ -174,5 +174,6 @@ struct i40iw_qp { struct i40iw_dma_mem ietf_mem; struct completion sq_drained; struct completion rq_drained; + struct completion free_qp; }; #endif diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 5f8f8d5c0ce0..7321d6ab5fe1 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -232,8 +232,3 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } - -void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) -{ - return; -} diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index b591861934b3..4aff1c8298b1 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -54,11 +54,20 @@ struct id_map_entry { struct delayed_work timeout; }; +struct rej_tmout_entry { + int slave; + u32 rem_pv_cm_id; + struct delayed_work timeout; + struct xarray *xa_rej_tmout; +}; + struct cm_generic_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; + unsigned char unused[2]; + __be16 rej_reason; }; struct cm_sidr_generic_msg { @@ -280,11 +289,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) if (!sriov->is_going_down && !id->scheduled_delete) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); + } else if (id->scheduled_delete) { + /* Adjust timeout if already scheduled */ + mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } spin_unlock_irqrestore(&sriov->going_down_lock, flags); spin_unlock(&sriov->id_map_lock); } +#define REJ_REASON(m) be16_to_cpu(((struct cm_generic_msg *)(m))->rej_reason) int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, struct ib_mad *mad) { @@ -293,8 +306,10 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id int pv_cm_id = -1; if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || - mad->mad_hdr.attr_id == CM_REP_ATTR_ID || - mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + mad->mad_hdr.attr_id == CM_REP_ATTR_ID || + mad->mad_hdr.attr_id == CM_MRA_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID || + (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && REJ_REASON(mad) == IB_CM_REJ_TIMEOUT)) { sl_cm_id = get_local_comm_id(mad); id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); if (id) @@ -314,8 +329,8 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id } if (!id) { - pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n", - slave_id, sl_cm_id); + pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL! attr_id: 0x%x\n", + slave_id, sl_cm_id, be16_to_cpu(mad->mad_hdr.attr_id)); return -EINVAL; } @@ -327,11 +342,94 @@ cont: return 0; } +static void rej_tmout_timeout(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout); + struct rej_tmout_entry *deleted; + + deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0); + + if (deleted != item) + pr_debug("deleted(%p) != item(%p)\n", deleted, item); + + kfree(item); +} + +static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave) +{ + struct rej_tmout_entry *item; + struct rej_tmout_entry *old; + int ret = 0; + + xa_lock(&sriov->xa_rej_tmout); + item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id); + + if (item) { + if (xa_err(item)) + ret = xa_err(item); + else + /* If a retry, adjust delayed work */ + mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + goto err_or_exists; + } + xa_unlock(&sriov->xa_rej_tmout); + + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + + INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout); + item->slave = slave; + item->rem_pv_cm_id = rem_pv_cm_id; + item->xa_rej_tmout = &sriov->xa_rej_tmout; + + old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL); + if (old) { + pr_debug( + "Non-null old entry (%p) or error (%d) when inserting\n", + old, xa_err(old)); + kfree(item); + return xa_err(old); + } + + schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + + return 0; + +err_or_exists: + xa_unlock(&sriov->xa_rej_tmout); + return ret; +} + +static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id) +{ + struct rej_tmout_entry *item; + int slave; + + xa_lock(&sriov->xa_rej_tmout); + item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id); + + if (!item || xa_err(item)) { + pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n", + rem_pv_cm_id, xa_err(item)); + slave = !item ? -ENOENT : xa_err(item); + } else { + slave = item->slave; + } + xa_unlock(&sriov->xa_rej_tmout); + + return slave; +} + int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, struct ib_mad *mad) { + struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; + u32 rem_pv_cm_id = get_local_comm_id(mad); u32 pv_cm_id; struct id_map_entry *id; + int sts; if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { @@ -347,6 +445,13 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, be64_to_cpu(gid.global.interface_id)); return -ENOENT; } + + sts = alloc_rej_tmout(sriov, rem_pv_cm_id, *slave); + if (sts) + /* Even if this fails, we pass on the REQ to the slave */ + pr_debug("Could not allocate rej_tmout entry. rem_pv_cm_id 0x%x slave %d status %d\n", + rem_pv_cm_id, *slave, sts); + return 0; } @@ -354,7 +459,14 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1); if (!id) { - pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id); + if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && + REJ_REASON(mad) == IB_CM_REJ_TIMEOUT && slave) { + *slave = lookup_rej_tmout_slave(sriov, rem_pv_cm_id); + + return (*slave < 0) ? *slave : 0; + } + pr_debug("Couldn't find an entry for pv_cm_id 0x%x, attr_id 0x%x\n", + pv_cm_id, be16_to_cpu(mad->mad_hdr.attr_id)); return -ENOENT; } @@ -375,6 +487,34 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev) INIT_LIST_HEAD(&dev->sriov.cm_list); dev->sriov.sl_id_map = RB_ROOT; xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC); + xa_init(&dev->sriov.xa_rej_tmout); +} + +static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave) +{ + struct rej_tmout_entry *item; + bool flush_needed = false; + unsigned long id; + int cnt = 0; + + xa_lock(&sriov->xa_rej_tmout); + xa_for_each(&sriov->xa_rej_tmout, id, item) { + if (slave < 0 || slave == item->slave) { + mod_delayed_work(system_wq, &item->timeout, 0); + flush_needed = true; + ++cnt; + } + } + xa_unlock(&sriov->xa_rej_tmout); + + if (flush_needed) { + flush_scheduled_work(); + pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n", + cnt, slave); + } + + if (slave < 0) + WARN_ON(!xa_empty(&sriov->xa_rej_tmout)); } /* slave = -1 ==> all slaves */ @@ -444,4 +584,6 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) list_del(&map->list); kfree(map); } + + rej_tmout_xa_cleanup(sriov, slave); } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 8a3436994f80..e9b5a4d57fb1 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -149,7 +149,6 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, if (IS_ERR(*umem)) return PTR_ERR(*umem); - n = ib_umem_page_count(*umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt); @@ -475,7 +474,7 @@ out: return err; } -void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(cq->device); struct mlx4_ib_cq *mcq = to_mcq(cq); @@ -495,6 +494,7 @@ void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) mlx4_db_free(dev->dev, &mcq->db); } ib_umem_release(mcq->umem); + return 0; } static void dump_cqe(void *cqe) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index abe68708d6d6..f3ace85552f3 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -500,6 +500,13 @@ static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid, sgid, dgid); } +static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) +{ + int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; + + return (qpn >= proxy_start && qpn <= proxy_start + 1); +} + int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad) @@ -520,8 +527,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, u16 cached_pkey; u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; - if (dest_qpt > IB_QPT_GSI) + if (dest_qpt > IB_QPT_GSI) { + pr_debug("dest_qpt (%d) > IB_QPT_GSI\n", dest_qpt); return -EINVAL; + } tun_ctx = dev->sriov.demux[port-1].tun[slave]; @@ -538,12 +547,20 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, if (dest_qpt) { u16 pkey_ix; ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey); - if (ret) + if (ret) { + pr_debug("unable to get %s cached pkey for index %d, ret %d\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", + wc->pkey_index, ret); return -EINVAL; + } ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix); - if (ret) + if (ret) { + pr_debug("unable to get %s pkey ix for pkey 0x%x, ret %d\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", + cached_pkey, ret); return -EINVAL; + } tun_pkey_ix = pkey_ix; } else tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; @@ -715,7 +732,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); if (err) - pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", slave, err); return 0; } @@ -794,7 +812,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); if (err) - pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", slave, err); return 0; } @@ -807,27 +826,6 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int err; struct ib_port_attr pattr; - if (in_wc && in_wc->qp) { - pr_debug("received MAD: port:%d slid:%d sqpn:%d " - "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n", - port_num, - in_wc->slid, in_wc->src_qp, - in_wc->dlid_path_bits, - in_wc->qp->qp_num, - in_wc->wc_flags, - be64_to_cpu(in_mad->mad_hdr.tid), - in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, - be16_to_cpu(in_mad->mad_hdr.attr_id)); - if (in_wc->wc_flags & IB_WC_GRH) { - pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", - be64_to_cpu(in_grh->sgid.global.subnet_prefix), - be64_to_cpu(in_grh->sgid.global.interface_id)); - pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n", - be64_to_cpu(in_grh->dgid.global.subnet_prefix), - be64_to_cpu(in_grh->dgid.global.interface_id)); - } - } - slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { @@ -1299,6 +1297,18 @@ static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg) spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); } +static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg) +{ + unsigned long flags; + struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context; + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE) + queue_work(ctx->wi_wq, &ctx->work); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, struct mlx4_ib_demux_pv_qp *tun_qp, int index) @@ -1341,14 +1351,6 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port, return ret; } -static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) -{ - int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; - - return (qpn >= proxy_start && qpn <= proxy_start + 1); -} - - int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr, @@ -1401,10 +1403,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= - (MLX4_NUM_TUNNEL_BUFS - 1)) + (MLX4_NUM_WIRE_BUFS - 1)) ret = -EAGAIN; else - wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); + wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1); spin_unlock(&sqp->tx_lock); if (ret) goto out; @@ -1484,6 +1486,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc u16 vlan_id; u8 qos; u8 *dmac; + int sts; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1520,6 +1523,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc return; } else *slave_id = slave; + break; default: /* nothing */; } @@ -1580,13 +1584,17 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc &vlan_id, &qos)) rdma_ah_set_sl(&ah_attr, qos); - mlx4_ib_send_to_wire(dev, slave, ctx->port, - is_proxy_qp0(dev, wc->src_qp, slave) ? - IB_QPT_SMI : IB_QPT_GSI, - be16_to_cpu(tunnel->hdr.pkey_index), - be32_to_cpu(tunnel->hdr.remote_qpn), - be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, wc->smac, vlan_id, &tunnel->mad); + sts = mlx4_ib_send_to_wire(dev, slave, ctx->port, + is_proxy_qp0(dev, wc->src_qp, slave) ? + IB_QPT_SMI : IB_QPT_GSI, + be16_to_cpu(tunnel->hdr.pkey_index), + be32_to_cpu(tunnel->hdr.remote_qpn), + be32_to_cpu(tunnel->hdr.qkey), + &ah_attr, wc->smac, vlan_id, &tunnel->mad); + if (sts) + pr_debug("failed sending %s to wire on behalf of slave %d (%d)\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", + slave, sts); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, @@ -1595,19 +1603,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, int i; struct mlx4_ib_demux_pv_qp *tun_qp; int rx_buf_size, tx_buf_size; + const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (qp_type > IB_QPT_GSI) return -EINVAL; tun_qp = &ctx->qp[qp_type]; - tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, + tun_qp->ring = kcalloc(nmbr_bufs, sizeof(struct mlx4_ib_buf), GFP_KERNEL); if (!tun_qp->ring) return -ENOMEM; - tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, + tun_qp->tx_ring = kcalloc(nmbr_bufs, sizeof (struct mlx4_ib_tun_tx_buf), GFP_KERNEL); if (!tun_qp->tx_ring) { @@ -1624,7 +1633,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, tx_buf_size = sizeof (struct mlx4_mad_snd_buf); } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL); if (!tun_qp->ring[i].addr) goto err; @@ -1638,7 +1647,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, } } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { tun_qp->tx_ring[i].buf.addr = kmalloc(tx_buf_size, GFP_KERNEL); if (!tun_qp->tx_ring[i].buf.addr) @@ -1669,7 +1678,7 @@ tx_err: tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); } - i = MLX4_NUM_TUNNEL_BUFS; + i = nmbr_bufs; err: while (i > 0) { --i; @@ -1690,6 +1699,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, int i; struct mlx4_ib_demux_pv_qp *tun_qp; int rx_buf_size, tx_buf_size; + const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (qp_type > IB_QPT_GSI) return; @@ -1704,13 +1714,13 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, rx_buf_size, DMA_FROM_DEVICE); kfree(tun_qp->ring[i].addr); } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); @@ -1744,9 +1754,6 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) "buf:%lld\n", wc.wr_id); break; case IB_WC_SEND: - pr_debug("received tunnel send completion:" - "wrid=0x%llx, status=0x%x\n", - wc.wr_id, wc.status); rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah @@ -1793,6 +1800,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, struct mlx4_ib_qp_tunnel_init_attr qp_init_attr; struct ib_qp_attr attr; int qp_attr_mask_INIT; + const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (qp_type > IB_QPT_GSI) return -EINVAL; @@ -1803,8 +1811,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, qp_init_attr.init_attr.send_cq = ctx->cq; qp_init_attr.init_attr.recv_cq = ctx->cq; qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS; - qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS; + qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs; + qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs; qp_init_attr.init_attr.cap.max_send_sge = 1; qp_init_attr.init_attr.cap.max_recv_sge = 1; if (create_tun) { @@ -1866,7 +1874,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, goto err_qp; } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i); if (ret) { pr_err(" mlx4_ib_post_pv_buf error" @@ -1902,8 +1910,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) switch (wc.opcode) { case IB_WC_SEND: kfree(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + (MLX4_NUM_WIRE_BUFS - 1)].ah); + sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); sqp->tx_ix_tail++; @@ -1912,13 +1920,13 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) case IB_WC_RECV: mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *) (sqp->ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload); + (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload); grh = &(((struct mlx4_mad_rcv_buf *) (sqp->ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh); + (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh); mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad); if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1))) + (MLX4_NUM_WIRE_BUFS - 1))) pr_err("Failed reposting SQP " "buf:%lld\n", wc.wr_id); break; @@ -1931,8 +1939,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { kfree(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + (MLX4_NUM_WIRE_BUFS - 1)].ah); + sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); sqp->tx_ix_tail++; @@ -1972,6 +1980,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, { int ret, cq_size; struct ib_cq_init_attr cq_attr = {}; + const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (ctx->state != DEMUX_PV_STATE_DOWN) return -EEXIST; @@ -1996,12 +2005,13 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, goto err_out_qp0; } - cq_size = 2 * MLX4_NUM_TUNNEL_BUFS; + cq_size = 2 * nmbr_bufs; if (ctx->has_smi) cq_size *= 2; cq_attr.cqe = cq_size; - ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, + ctx->cq = ib_create_cq(ctx->ib_dev, + create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler, NULL, ctx, &cq_attr); if (IS_ERR(ctx->cq)) { ret = PTR_ERR(ctx->cq); @@ -2038,6 +2048,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker); ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq; + ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq; ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); if (ret) { @@ -2181,7 +2192,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, goto err_mcg; } - snprintf(name, sizeof name, "mlx4_ibt%d", port); + snprintf(name, sizeof(name), "mlx4_ibt%d", port); ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->wq) { pr_err("Failed to create tunnelling WQ for port %d\n", port); @@ -2189,7 +2200,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, goto err_wq; } - snprintf(name, sizeof name, "mlx4_ibud%d", port); + snprintf(name, sizeof(name), "mlx4_ibwi%d", port); + ctx->wi_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); + if (!ctx->wi_wq) { + pr_err("Failed to create wire WQ for port %d\n", port); + ret = -ENOMEM; + goto err_wiwq; + } + + snprintf(name, sizeof(name), "mlx4_ibud%d", port); ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->ud_wq) { pr_err("Failed to create up/down WQ for port %d\n", port); @@ -2200,6 +2219,10 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, return 0; err_udwq: + destroy_workqueue(ctx->wi_wq); + ctx->wi_wq = NULL; + +err_wiwq: destroy_workqueue(ctx->wq); ctx->wq = NULL; @@ -2247,12 +2270,14 @@ static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx) ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING; } flush_workqueue(ctx->wq); + flush_workqueue(ctx->wi_wq); for (i = 0; i < dev->dev->caps.sqp_demux; i++) { destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0); free_pv_object(dev, i, ctx->port); } kfree(ctx->tun); destroy_workqueue(ctx->ud_wq); + destroy_workqueue(ctx->wi_wq); destroy_workqueue(ctx->wq); } } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index bd4f975e7f9a..e3cd402c079a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1215,9 +1215,10 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); + return 0; } static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) @@ -1256,11 +1257,12 @@ err2: return err; } -static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); + return 0; } static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) @@ -1533,23 +1535,11 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att struct mlx4_net_trans_rule_hw_ctrl *ctrl; int default_flow; - static const u16 __mlx4_domain[] = { - [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, - [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL, - [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS, - [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC, - }; - if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) { pr_err("Invalid priority value %d\n", flow_attr->priority); return -EINVAL; } - if (domain >= IB_FLOW_DOMAIN_NUM) { - pr_err("Invalid domain value %d\n", domain); - return -EINVAL; - } - if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0) return -EINVAL; @@ -1558,8 +1548,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att return PTR_ERR(mailbox); ctrl = mailbox->buf; - ctrl->prio = cpu_to_be16(__mlx4_domain[domain] | - flow_attr->priority); + ctrl->prio = cpu_to_be16(domain | flow_attr->priority); ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type); ctrl->port = flow_attr->port; ctrl->qpn = cpu_to_be32(qp->qp_num); @@ -1701,8 +1690,8 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev, } static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain, struct ib_udata *udata) + struct ib_flow_attr *flow_attr, + struct ib_udata *udata) { int err = 0, i = 0, j = 0; struct mlx4_ib_flow *mflow; @@ -1768,8 +1757,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, } while (i < ARRAY_SIZE(type) && type[i]) { - err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], - &mflow->reg_id[i].id); + err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS, + type[i], &mflow->reg_id[i].id); if (err) goto err_create_flow; if (is_bonded) { @@ -1778,7 +1767,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, */ flow_attr->port = 2; err = __mlx4_ib_create_flow(qp, flow_attr, - domain, type[j], + MLX4_DOMAIN_UVERBS, type[j], &mflow->reg_id[j].mirror); flow_attr->port = 1; if (err) @@ -2035,7 +2024,8 @@ static ssize_t hca_type_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); + + return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2044,7 +2034,8 @@ static ssize_t hw_rev_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->dev->rev_id); + + return sysfs_emit(buf, "%x\n", dev->dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2054,8 +2045,7 @@ static ssize_t board_id_show(struct device *device, struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, - dev->dev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -2275,10 +2265,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, u64 release_mac = MLX4_IB_INVALID_MAC; struct mlx4_ib_qp *qp; - read_lock(&dev_base_lock); new_smac = mlx4_mac_to_u64(dev->dev_addr); - read_unlock(&dev_base_lock); - atomic64_set(&ibdev->iboe.mac[port - 1], new_smac); /* no need for update QP1 and mac registration in non-SRIOV */ @@ -2589,11 +2576,16 @@ static const struct ib_device_ops mlx4_ib_dev_wq_ops = { .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table, .destroy_wq = mlx4_ib_destroy_wq, .modify_wq = mlx4_ib_modify_wq, + + INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table, + ib_rwq_ind_tbl), }; static const struct ib_device_ops mlx4_ib_dev_mw_ops = { .alloc_mw = mlx4_ib_alloc_mw, .dealloc_mw = mlx4_ib_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw), }; static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { @@ -2663,73 +2655,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == - IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + IB_LINK_LAYER_ETHERNET))) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || - dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); } if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } @@ -2847,7 +2791,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_steer_free_bitmap; rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group); - if (ib_register_device(&ibdev->ib_dev, "mlx4_%d")) + if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", + &dev->persist->pdev->dev)) goto err_diag_counters; if (mlx4_ib_mad_init(ibdev)) @@ -2989,10 +2934,8 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, /* Add an empty rule for IB L2 */ memset(&ib_spec->mask, 0, sizeof(ib_spec->mask)); - err = __mlx4_ib_create_flow(&mqp->ibqp, flow, - IB_FLOW_DOMAIN_NIC, - MLX4_FS_REGULAR, - &mqp->reg_id); + err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC, + MLX4_FS_REGULAR, &mqp->reg_id); } else { err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); } diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 5e4ec9786081..33f525b744f2 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -988,53 +988,63 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, } static ssize_t sysfs_show_group(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct mcast_group *group = container_of(attr, struct mcast_group, dentry); struct mcast_req *req = NULL; - char pending_str[40]; char state_str[40]; - ssize_t len = 0; - int f; + char pending_str[40]; + int len; + int i; + u32 hoplimit; if (group->state == MCAST_IDLE) - sprintf(state_str, "%s", get_state_string(group->state)); + scnprintf(state_str, sizeof(state_str), "%s", + get_state_string(group->state)); else - sprintf(state_str, "%s(TID=0x%llx)", - get_state_string(group->state), - be64_to_cpu(group->last_req_tid)); + scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)", + get_state_string(group->state), + be64_to_cpu(group->last_req_tid)); + if (list_empty(&group->pending_list)) { - sprintf(pending_str, "No"); + scnprintf(pending_str, sizeof(pending_str), "No"); } else { - req = list_first_entry(&group->pending_list, struct mcast_req, group_list); - sprintf(pending_str, "Yes(TID=0x%llx)", - be64_to_cpu(req->sa_mad.mad_hdr.tid)); + req = list_first_entry(&group->pending_list, struct mcast_req, + group_list); + scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)", + be64_to_cpu(req->sa_mad.mad_hdr.tid)); } - len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", - group->rec.scope_join_state & 0xf, - group->members[2], group->members[1], group->members[0], - atomic_read(&group->refcount), - pending_str, - state_str); - for (f = 0; f < MAX_VFS; ++f) - if (group->func[f].state == MCAST_MEMBER) - len += sprintf(buf + len, "%d[%1x] ", - f, group->func[f].join_state); - - len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x " - "%4x %4x %2x %2x)\n", - be16_to_cpu(group->rec.pkey), - be32_to_cpu(group->rec.qkey), - (group->rec.mtusel_mtu & 0xc0) >> 6, - group->rec.mtusel_mtu & 0x3f, - group->rec.tclass, - (group->rec.ratesel_rate & 0xc0) >> 6, - group->rec.ratesel_rate & 0x3f, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8, - be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff, - group->rec.proxy_join); + + len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ", + group->rec.scope_join_state & 0xf, + group->members[2], + group->members[1], + group->members[0], + atomic_read(&group->refcount), + pending_str, + state_str); + + for (i = 0; i < MAX_VFS; i++) { + if (group->func[i].state == MCAST_MEMBER) + len += sysfs_emit_at(buf, len, "%d[%1x] ", i, + group->func[i].join_state); + } + + hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit); + len += sysfs_emit_at(buf, len, + "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n", + be16_to_cpu(group->rec.pkey), + be32_to_cpu(group->rec.qkey), + (group->rec.mtusel_mtu & 0xc0) >> 6, + (group->rec.mtusel_mtu & 0x3f), + group->rec.tclass, + (group->rec.ratesel_rate & 0xc0) >> 6, + (group->rec.ratesel_rate & 0x3f), + (hoplimit & 0xf0000000) >> 28, + (hoplimit & 0x0fffff00) >> 8, + (hoplimit & 0x000000ff), + group->rec.proxy_join); return len; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 38e87a700a2a..78c9bb79ec75 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -233,7 +233,8 @@ enum mlx4_ib_mad_ifc_flags { }; enum { - MLX4_NUM_TUNNEL_BUFS = 256, + MLX4_NUM_TUNNEL_BUFS = 512, + MLX4_NUM_WIRE_BUFS = 2048, }; struct mlx4_ib_tunnel_header { @@ -298,6 +299,26 @@ struct mlx4_ib_rss { u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; }; +enum { + /* + * Largest possible UD header: send with GRH and immediate + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) + */ + MLX4_IB_UD_HEADER_SIZE = 82, + MLX4_IB_LSO_HEADER_SPARE = 128, +}; + +struct mlx4_ib_sqp { + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; + struct ib_qp *roce_v2_gsi; +}; + struct mlx4_ib_qp { union { struct ib_qp ibqp; @@ -343,7 +364,10 @@ struct mlx4_ib_qp { struct mlx4_wqn_range *wqn_range; /* Number of RSS QP parents that uses this WQ */ u32 rss_usecnt; - struct mlx4_ib_rss *rss_ctx; + union { + struct mlx4_ib_rss *rss_ctx; + struct mlx4_ib_sqp *sqp; + }; }; struct mlx4_ib_srq { @@ -366,6 +390,10 @@ struct mlx4_ib_ah { union mlx4_ext_av av; }; +struct mlx4_ib_rwq_ind_table { + struct ib_rwq_ind_table ib_rwq_ind_tbl; +}; + /****************************************/ /* alias guid support */ /****************************************/ @@ -454,6 +482,7 @@ struct mlx4_ib_demux_pv_ctx { struct ib_pd *pd; struct work_struct work; struct workqueue_struct *wq; + struct workqueue_struct *wi_wq; struct mlx4_ib_demux_pv_qp qp[2]; }; @@ -461,6 +490,7 @@ struct mlx4_ib_demux_ctx { struct ib_device *ib_dev; int port; struct workqueue_struct *wq; + struct workqueue_struct *wi_wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; atomic64_t subnet_prefix; @@ -492,6 +522,7 @@ struct mlx4_ib_sriov { spinlock_t id_map_lock; struct rb_root sl_id_map; struct list_head cm_list; + struct xarray xa_rej_tmout; }; struct gid_cache_context { @@ -725,8 +756,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); +int mlx4_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -736,7 +766,7 @@ int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); @@ -747,14 +777,17 @@ int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); +static inline int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return 0; +} int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); @@ -875,10 +908,10 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, const struct ib_gid_attr *attr); @@ -890,15 +923,18 @@ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -void mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); -struct ib_rwq_ind_table -*mlx4_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata); -int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); +int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); +static inline int +mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table) +{ + return 0; +} int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 1d5ef0de12c9..50becc0e4b62 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -271,6 +271,8 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, u64 total_len = 0; int i; + *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE); + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { /* * Initialization - save the first chunk start as the @@ -421,7 +423,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_free; } - n = ib_umem_page_count(mr->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, @@ -455,10 +456,10 @@ err_free: return ERR_PTR(err); } -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(mr->device); struct mlx4_ib_mr *mmr = to_mmr(mr); @@ -471,9 +472,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, * race exists. */ err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); - if (err) - return err; + return ERR_PTR(err); if (flags & IB_MR_REREG_PD) { err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, @@ -511,7 +511,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, mmr->umem = NULL; goto release_mpt_entry; } - n = ib_umem_page_count(mmr->umem); + n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE); shift = PAGE_SHIFT; err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, @@ -541,8 +541,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, release_mpt_entry: mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); - - return err; + if (err) + return ERR_PTR(err); + return NULL; } static int @@ -610,37 +611,27 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return 0; } -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) +int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { - struct mlx4_ib_dev *dev = to_mdev(pd->device); - struct mlx4_ib_mw *mw; + struct mlx4_ib_dev *dev = to_mdev(ibmw->device); + struct mlx4_ib_mw *mw = to_mmw(ibmw); int err; - mw = kmalloc(sizeof(*mw), GFP_KERNEL); - if (!mw) - return ERR_PTR(-ENOMEM); - - err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, - to_mlx4_type(type), &mw->mmw); + err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn, + to_mlx4_type(ibmw->type), &mw->mmw); if (err) - goto err_free; + return err; err = mlx4_mw_enable(dev->dev, &mw->mmw); if (err) goto err_mw; - mw->ibmw.rkey = mw->mmw.key; - - return &mw->ibmw; + ibmw->rkey = mw->mmw.key; + return 0; err_mw: mlx4_mw_free(dev->dev, &mw->mmw); - -err_free: - kfree(mw); - - return ERR_PTR(err); + return err; } int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) @@ -648,8 +639,6 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) struct mlx4_ib_mw *mw = to_mmw(ibmw); mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); - kfree(mw); - return 0; } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2975f350b9fd..651785bd57f2 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -68,27 +68,6 @@ enum { }; enum { - /* - * Largest possible UD header: send with GRH and immediate - * data plus 18 bytes for an Ethernet header with VLAN/802.1Q - * tag. (LRH would only use 8 bytes, so Ethernet is the - * biggest case) - */ - MLX4_IB_UD_HEADER_SIZE = 82, - MLX4_IB_LSO_HEADER_SPARE = 128, -}; - -struct mlx4_ib_sqp { - struct mlx4_ib_qp qp; - int pkey_index; - u32 qkey; - u32 send_psn; - struct ib_ud_header ud_header; - u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; - struct ib_qp *roce_v2_gsi; -}; - -enum { MLX4_IB_MIN_SQ_STRIDE = 6, MLX4_IB_CACHE_LINE_SIZE = 64, }; @@ -123,11 +102,6 @@ enum mlx4_ib_source_type { MLX4_IB_RWQ_SRC = 1, }; -static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) -{ - return container_of(mqp, struct mlx4_ib_sqp, qp); -} - static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { if (!mlx4_is_master(dev->dev)) @@ -656,8 +630,6 @@ static int create_qp_rss(struct mlx4_ib_dev *dev, if (err) goto err_qpn; - mutex_init(&qp->mutex); - INIT_LIST_HEAD(&qp->gid_list); INIT_LIST_HEAD(&qp->steering_rules); @@ -696,80 +668,72 @@ err_qpn: return err; } -static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int _mlx4_ib_create_qp_rss(struct ib_pd *pd, struct mlx4_ib_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx4_ib_qp *qp; struct mlx4_ib_create_qp_rss ucmd = {}; size_t required_cmd_sz; int err; if (!udata) { pr_debug("RSS QP with NULL udata\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (udata->outlen) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); if (udata->inlen < required_cmd_sz) { pr_debug("invalid inlen\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { pr_debug("copy failed\n"); - return ERR_PTR(-EFAULT); + return -EFAULT; } if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved))) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (ucmd.comp_mask || ucmd.reserved1) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (udata->inlen > sizeof(ucmd) && !ib_is_udata_cleared(udata, sizeof(ucmd), udata->inlen - sizeof(ucmd))) { pr_debug("inlen is not supported\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->qp_type != IB_QPT_RAW_PACKET) { pr_debug("RSS QP with unsupported QP type %d\n", init_attr->qp_type); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->create_flags) { pr_debug("RSS QP doesn't support create flags\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->send_cq || init_attr->cap.max_send_wr) { pr_debug("RSS QP with unsupported send attributes\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); - qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp); - if (err) { - kfree(qp); - return ERR_PTR(err); - } + if (err) + return err; qp->ibqp.qp_num = qp->mqp.qpn; - - return &qp->ibqp; + return 0; } /* @@ -873,7 +837,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET; - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); @@ -922,7 +885,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, goto err; } - n = ib_umem_page_count(qp->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); @@ -989,13 +951,11 @@ err: static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, - struct mlx4_ib_qp **caller_qp) + struct mlx4_ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(pd->device); int qpn; int err; - struct mlx4_ib_sqp *sqp = NULL; - struct mlx4_ib_qp *qp; struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( udata, struct mlx4_ib_ucontext, ibucontext); enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; @@ -1043,27 +1003,18 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, sqpn = qpn; } - if (!*caller_qp) { - if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || - (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); - if (!sqp) - return -ENOMEM; - qp = &sqp->qp; - } else { - qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL); - if (!qp) - return -ENOMEM; - } - qp->pri.vid = 0xFFFF; - qp->alt.vid = 0xFFFF; - } else - qp = *caller_qp; + if (init_attr->qp_type == IB_QPT_SMI || + init_attr->qp_type == IB_QPT_GSI || qp_type == MLX4_IB_QPT_SMI || + qp_type == MLX4_IB_QPT_GSI || + (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { + qp->sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); + if (!qp->sqp) + return -ENOMEM; + } qp->mlx4_ib_qp_type = qp_type; - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); @@ -1117,7 +1068,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, goto err; } - n = ib_umem_page_count(qp->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); @@ -1239,9 +1189,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->mqp.event = mlx4_ib_qp_event; - if (!*caller_qp) - *caller_qp = qp; - spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq)); @@ -1293,10 +1240,7 @@ err_db: mlx4_db_free(dev->dev, &qp->db); err: - if (!sqp && !*caller_qp) - kfree(qp); - kfree(sqp); - + kfree(qp->sqp); return err; } @@ -1410,7 +1354,6 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) mlx4_qp_free(dev->dev, &qp->mqp); mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); del_gid_entries(qp); - kfree(qp->rss_ctx); } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, @@ -1529,17 +1472,16 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) return dev->dev->caps.spec_qps[attr->port_num - 1].qp1_proxy; } -static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx4_ib_qp *qp = NULL; int err; int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; if (init_attr->rwq_ind_tbl) - return _mlx4_ib_create_qp_rss(pd, init_attr, udata); + return _mlx4_ib_create_qp_rss(pd, qp, init_attr, udata); /* * We only support LSO, vendor flag1, and multicast loopback blocking, @@ -1551,16 +1493,16 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | MLX4_IB_QP_CREATE_ROCE_V2_GSI)) - return ERR_PTR(-EINVAL); + return -EOPNOTSUPP; if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (init_attr->qp_type != IB_QPT_UD) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (init_attr->create_flags) { if (udata && init_attr->create_flags & ~(sup_u_create_flags)) - return ERR_PTR(-EINVAL); + return -EINVAL; if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_ROCE_V2_GSI | @@ -1570,7 +1512,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, init_attr->qp_type > IB_QPT_GSI) || (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI && init_attr->qp_type != IB_QPT_GSI)) - return ERR_PTR(-EINVAL); + return -EINVAL; } switch (init_attr->qp_type) { @@ -1581,53 +1523,48 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, fallthrough; case IB_QPT_XRC_INI: if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) - return ERR_PTR(-ENOSYS); + return -ENOSYS; init_attr->recv_cq = init_attr->send_cq; fallthrough; case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); + case IB_QPT_UD: qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; - fallthrough; - case IB_QPT_UD: - { - err = create_qp_common(pd, init_attr, udata, 0, &qp); - if (err) { - kfree(qp); - return ERR_PTR(err); - } + err = create_qp_common(pd, init_attr, udata, 0, qp); + if (err) + return err; qp->ibqp.qp_num = qp->mqp.qpn; qp->xrcdn = xrcdn; - break; - } case IB_QPT_SMI: case IB_QPT_GSI: { int sqpn; - /* Userspace is not allowed to create special QPs: */ - if (udata) - return ERR_PTR(-EINVAL); if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0, MLX4_RES_USAGE_DRIVER); if (res) - return ERR_PTR(res); + return res; } else { sqpn = get_sqp_num(to_mdev(pd->device), init_attr); } - err = create_qp_common(pd, init_attr, udata, sqpn, &qp); + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + err = create_qp_common(pd, init_attr, udata, sqpn, qp); if (err) - return ERR_PTR(err); + return err; + + if (init_attr->create_flags & + (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP)) + /* Internal QP created with ib_create_qp */ + rdma_restrack_no_track(&qp->ibqp.res); qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : @@ -1636,25 +1573,33 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, } default: /* Don't support raw QPs */ - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } - - return &qp->ibqp; + return 0; } struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct ib_device *device = pd ? pd->device : init_attr->xrcd->device; - struct ib_qp *ibqp; struct mlx4_ib_dev *dev = to_mdev(device); + struct mlx4_ib_qp *qp; + int ret; - ibqp = _mlx4_ib_create_qp(pd, init_attr, udata); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); - if (!IS_ERR(ibqp) && - (init_attr->qp_type == IB_QPT_GSI) && + mutex_init(&qp->mutex); + ret = _mlx4_ib_create_qp(pd, qp, init_attr, udata); + if (ret) { + kfree(qp); + return ERR_PTR(ret); + } + + if (init_attr->qp_type == IB_QPT_GSI && !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) { - struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp))); + struct mlx4_ib_sqp *sqp = qp->sqp; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num); if (is_eth && @@ -1666,14 +1611,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi)); sqp->roce_v2_gsi = NULL; } else { - sqp = to_msqp(to_mqp(sqp->roce_v2_gsi)); - sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP; + to_mqp(sqp->roce_v2_gsi)->flags |= + MLX4_IB_ROCE_V2_GSI_QP; } init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI; } } - return ibqp; + return &qp->ibqp; } static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) @@ -1700,10 +1645,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata); } - if (is_sqp(dev, mqp)) - kfree(to_msqp(mqp)); - else - kfree(mqp); + kfree(mqp->sqp); + kfree(mqp); return 0; } @@ -1713,7 +1656,7 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) struct mlx4_ib_qp *mqp = to_mqp(qp); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { - struct mlx4_ib_sqp *sqp = to_msqp(mqp); + struct mlx4_ib_sqp *sqp = mqp->sqp; if (sqp->roce_v2_gsi) ib_destroy_qp(sqp->roce_v2_gsi); @@ -2575,7 +2518,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, qp->alt_port = attr->alt_port_num; if (is_sqp(dev, qp)) - store_sqp_attrs(to_msqp(qp), attr, attr_mask); + store_sqp_attrs(qp->sqp, attr, attr_mask); /* * If we moved QP0 to RTR, bring the IB link up; if we moved @@ -2849,10 +2792,13 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx4_ib_qp *mqp = to_mqp(ibqp); int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { - struct mlx4_ib_sqp *sqp = to_msqp(mqp); + struct mlx4_ib_sqp *sqp = mqp->sqp; int err = 0; if (sqp->roce_v2_gsi) @@ -2877,12 +2823,13 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) return -EINVAL; } -static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, +static int build_sriov_qp0_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { - struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); - struct ib_device *ib_dev = &mdev->ib_dev; + struct mlx4_ib_dev *mdev = to_mdev(qp->ibqp.device); + struct mlx4_ib_sqp *sqp = qp->sqp; + struct ib_device *ib_dev = qp->ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->ah); @@ -2904,12 +2851,12 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, /* for proxy-qp0 sends, need to add in size of tunnel header */ /* for tunnel-qp0 sends, tunnel header is already in s/g list */ - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) send_size += sizeof (struct mlx4_ib_tunnel_header); ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header); - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { sqp->ud_header.lrh.service_level = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; sqp->ud_header.lrh.destination_lid = @@ -2926,26 +2873,26 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, sqp->ud_header.lrh.virtual_lane = 0; sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); + err = ib_get_cached_pkey(ib_dev, qp->port, 0, &pkey); if (err) return err; sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); else sqp->ud_header.bth.destination_qpn = - cpu_to_be32(mdev->dev->caps.spec_qps[sqp->qp.port - 1].qp0_tunnel); + cpu_to_be32(mdev->dev->caps.spec_qps[qp->port - 1].qp0_tunnel); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); if (mlx4_is_master(mdev->dev)) { - if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + if (mlx4_get_parav_qkey(mdev->dev, qp->mqp.qpn, &qkey)) return -EINVAL; } else { - if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + if (vf_get_qp0_qkey(mdev->dev, qp->mqp.qpn, &qkey)) return -EINVAL; } sqp->ud_header.deth.qkey = cpu_to_be32(qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); + sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->mqp.qpn); sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; @@ -3029,10 +2976,11 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, } #define MLX4_ROCEV2_QP1_SPORT 0xC000 -static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, +static int build_mlx_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { - struct ib_device *ib_dev = sqp->qp.ibqp.device; + struct mlx4_ib_sqp *sqp = qp->sqp; + struct ib_device *ib_dev = qp->ibqp.device; struct mlx4_ib_dev *ibdev = to_mdev(ib_dev); struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_ctrl_seg *ctrl = wqe; @@ -3056,7 +3004,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, for (i = 0; i < wr->wr.num_sge; ++i) send_size += wr->wr.sg_list[i].length; - is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; + is_eth = rdma_port_get_link_layer(qp->ibqp.device, qp->port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { enum ib_gid_type gid_type; @@ -3070,9 +3018,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, if (err) return err; } else { - err = fill_gid_by_hw_index(ibdev, sqp->qp.port, - ah->av.ib.gid_index, - &sgid, &gid_type); + err = fill_gid_by_hw_index(ibdev, qp->port, + ah->av.ib.gid_index, &sgid, + &gid_type); if (!err) { is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; if (is_udp) { @@ -3117,13 +3065,18 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, * indexes don't necessarily match the hw ones, so * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. - demux[sqp->qp.port - 1]. - subnet_prefix))); - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; + sqp->ud_header.grh.source_gid.global + .subnet_prefix = + cpu_to_be64(atomic64_read( + &(to_mdev(ib_dev) + ->sriov + .demux[qp->port - 1] + .subnet_prefix))); + sqp->ud_header.grh.source_gid.global + .interface_id = + to_mdev(ib_dev) + ->sriov.demux[qp->port - 1] + .guid_cache[ah->av.ib.gid_index]; } else { sqp->ud_header.grh.source_gid = ah->ibah.sgid_attr->gid; @@ -3155,10 +3108,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); if (!is_eth) { - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == - IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | - (sqp->ud_header.lrh.service_level << 8)); + mlx->flags |= + cpu_to_be32((!qp->ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? + MLX4_WQE_MLX_SLR : + 0) | + (sqp->ud_header.lrh.service_level << 8)); if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) mlx->flags |= cpu_to_be32(0x1); /* force loopback */ mlx->rlid = sqp->ud_header.lrh.destination_lid; @@ -3204,21 +3160,23 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : - sl_to_vl(to_mdev(ib_dev), - sqp->ud_header.lrh.service_level, - sqp->qp.port); - if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) + sqp->ud_header.lrh.virtual_lane = + !qp->ibqp.qp_num ? + 15 : + sl_to_vl(to_mdev(ib_dev), + sqp->ud_header.lrh.service_level, + qp->port); + if (qp->ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) return -EINVAL; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - if (!sqp->qp.ibqp.qp_num) - err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, + if (!qp->ibqp.qp_num) + err = ib_get_cached_pkey(ib_dev, qp->port, sqp->pkey_index, &pkey); else - err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, + err = ib_get_cached_pkey(ib_dev, qp->port, wr->pkey_index, &pkey); if (err) return err; @@ -3228,7 +3186,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? sqp->qkey : wr->remote_qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); @@ -3551,14 +3509,14 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { - struct mlx4_ib_sqp *sqp = to_msqp(qp); + struct mlx4_ib_sqp *sqp = qp->sqp; if (sqp->roce_v2_gsi) { struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah); enum ib_gid_type gid_type; union ib_gid gid; - if (!fill_gid_by_hw_index(mdev, sqp->qp.port, + if (!fill_gid_by_hw_index(mdev, qp->port, ah->av.ib.gid_index, &gid, &gid_type)) qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? @@ -3678,8 +3636,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX4_IB_QPT_TUN_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), - ctrl, &seglen); + err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl, + &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -3715,8 +3673,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), - ctrl, &seglen); + err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl, + &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -3749,8 +3707,7 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: - err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl, - &seglen); + err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -4058,7 +4015,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context.params2)); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; @@ -4172,6 +4131,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); + mutex_init(&qp->mutex); qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; @@ -4327,7 +4287,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, return err; } -void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) +int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibwq->device); struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); @@ -4338,36 +4298,35 @@ void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata); kfree(qp); + return 0; } -struct ib_rwq_ind_table -*mlx4_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata) +int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) { - struct ib_rwq_ind_table *rwq_ind_table; struct mlx4_ib_create_rwq_ind_tbl_resp resp = {}; unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size; + struct ib_device *device = rwq_ind_table->device; unsigned int base_wqn; size_t min_resp_len; - int i; - int err; + int i, err = 0; if (udata->inlen > 0 && !ib_is_udata_cleared(udata, 0, udata->inlen)) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); if (udata->outlen && udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); + return -EINVAL; if (ind_tbl_size > device->attrs.rss_caps.max_rwq_indirection_table_size) { pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n", ind_tbl_size, device->attrs.rss_caps.max_rwq_indirection_table_size); - return ERR_PTR(-EINVAL); + return -EINVAL; } base_wqn = init_attr->ind_tbl[0]->wq_num; @@ -4375,39 +4334,23 @@ struct ib_rwq_ind_table if (base_wqn % ind_tbl_size) { pr_debug("WQN=0x%x isn't aligned with indirection table size\n", base_wqn); - return ERR_PTR(-EINVAL); + return -EINVAL; } for (i = 1; i < ind_tbl_size; i++) { if (++base_wqn != init_attr->ind_tbl[i]->wq_num) { pr_debug("indirection table's WQNs aren't consecutive\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } } - rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL); - if (!rwq_ind_table) - return ERR_PTR(-ENOMEM); - if (udata->outlen) { resp.response_length = offsetof(typeof(resp), response_length) + sizeof(resp.response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); - if (err) - goto err; } - return rwq_ind_table; - -err: - kfree(rwq_ind_table); - return ERR_PTR(err); -} - -int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) -{ - kfree(ib_rwq_ind_tbl); - return 0; + return err; } struct mlx4_ib_drain_cqe { diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 8f9d5035142d..6a381751c0d8 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -86,6 +86,10 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, int err; int i; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) @@ -115,8 +119,9 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), - PAGE_SHIFT, &srq->mtt); + err = mlx4_mtt_init( + dev->dev, ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE), + PAGE_SHIFT, &srq->mtt); if (err) goto err_buf; @@ -260,7 +265,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return 0; } -void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(srq->device); struct mlx4_ib_srq *msrq = to_msrq(srq); @@ -282,6 +287,7 @@ void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) mlx4_db_free(dev->dev, &msrq->db); } ib_umem_release(msrq->umem); + return 0; } void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index ea1f3a081b05..1b5891130aab 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -56,7 +56,7 @@ static ssize_t show_admin_alias_guid(struct device *dev, mlx4_ib_iov_dentry->entry_num, port->num); - return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); + return sysfs_emit(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); } /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. @@ -117,22 +117,24 @@ static ssize_t show_port_gid(struct device *dev, struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_dev *mdev = port->dev; union ib_gid gid; - ssize_t ret; + int ret; + __be16 *raw; ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num, mlx4_ib_iov_dentry->entry_num, &gid, 1); if (ret) return ret; - ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) gid.raw)[0]), - be16_to_cpu(((__be16 *) gid.raw)[1]), - be16_to_cpu(((__be16 *) gid.raw)[2]), - be16_to_cpu(((__be16 *) gid.raw)[3]), - be16_to_cpu(((__be16 *) gid.raw)[4]), - be16_to_cpu(((__be16 *) gid.raw)[5]), - be16_to_cpu(((__be16 *) gid.raw)[6]), - be16_to_cpu(((__be16 *) gid.raw)[7])); - return ret; + + raw = (__be16 *)gid.raw; + return sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + be16_to_cpu(raw[0]), + be16_to_cpu(raw[1]), + be16_to_cpu(raw[2]), + be16_to_cpu(raw[3]), + be16_to_cpu(raw[4]), + be16_to_cpu(raw[5]), + be16_to_cpu(raw[6]), + be16_to_cpu(raw[7])); } static ssize_t show_phys_port_pkey(struct device *dev, @@ -151,7 +153,7 @@ static ssize_t show_phys_port_pkey(struct device *dev, if (ret) return ret; - return sprintf(buf, "0x%04x\n", pkey); + return sysfs_emit(buf, "0x%04x\n", pkey); } #define DENTRY_REMOVE(_dentry) \ @@ -441,16 +443,12 @@ static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - ssize_t ret = -ENODEV; - - if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >= - (p->dev->dev->caps.pkey_table_len[p->port_num])) - ret = sprintf(buf, "none\n"); - else - ret = sprintf(buf, "%d\n", - p->dev->pkeys.virt2phys_pkey[p->slave] - [p->port_num - 1][tab_attr->index]); - return ret; + struct pkey_mgt *m = &p->dev->pkeys; + u8 key = m->virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index]; + + if (key >= p->dev->dev->caps.pkey_table_len[p->port_num]) + return sysfs_emit(buf, "none\n"); + return sysfs_emit(buf, "%d\n", key); } static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, @@ -488,7 +486,7 @@ static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, static ssize_t show_port_gid_idx(struct mlx4_port *p, struct port_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", p->slave); + return sysfs_emit(buf, "%d\n", p->slave); } static struct attribute ** @@ -542,14 +540,10 @@ static ssize_t sysfs_show_smi_enabled(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, smi_enabled); - ssize_t len = 0; - if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); - else - len = sprintf(buf, "%d\n", 0); - - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_smi_enabled(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_show_enable_smi_admin(struct device *dev, @@ -558,14 +552,10 @@ static ssize_t sysfs_show_enable_smi_admin(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, enable_smi_admin); - ssize_t len = 0; - - if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); - else - len = sprintf(buf, "%d\n", 0); - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_store_enable_smi_admin(struct device *dev, diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 59e5ec39b447..505bc47fd575 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -106,8 +106,8 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) { int err; struct mlx5_ib_create_ah_resp resp = {}; - u32 min_resp_len = offsetof(typeof(resp), dmac) + - sizeof(resp.dmac); + u32 min_resp_len = + offsetofend(struct mlx5_ib_create_ah_resp, dmac); if (udata->outlen < min_resp_len) return -EINVAL; @@ -147,8 +147,3 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } - -void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) -{ - return; -} diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ebb2f108b64f..234f29912ba9 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -168,14 +168,14 @@ void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid) mlx5_cmd_exec_in(dev, destroy_tis, in); } -void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) +int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); MLX5_SET(destroy_rqt_in, in, uid, uid); - mlx5_cmd_exec_in(dev, destroy_rqt, in); + return mlx5_cmd_exec_in(dev, destroy_rqt, in); } int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, @@ -209,14 +209,14 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, mlx5_cmd_exec_in(dev, dealloc_transport_domain, in); } -void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) +int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) { u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); MLX5_SET(dealloc_pd_in, in, pd, pdn); MLX5_SET(dealloc_pd_in, in, uid, uid); - mlx5_cmd_exec_in(dev, dealloc_pd, in); + return mlx5_cmd_exec_in(dev, dealloc_pd, in); } int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1d192a8ca87d..88ea6ef8f2cb 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -44,10 +44,10 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); -void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); +int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); -void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid); +int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid); int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, u16 uid); void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 145f3cb40ccb..084652e2b15a 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -117,7 +117,7 @@ err_bound: return ret; } -static void mlx5_ib_destroy_counters(struct ib_counters *counters) +static int mlx5_ib_destroy_counters(struct ib_counters *counters) { struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); @@ -125,6 +125,7 @@ static void mlx5_ib_destroy_counters(struct ib_counters *counters) if (mcounters->hw_cntrs_hndl) mlx5_fc_destroy(to_mdev(counters->device)->mdev, mcounters->hw_cntrs_hndl); + return 0; } static int mlx5_ib_create_counters(struct ib_counters *counters, @@ -137,13 +138,6 @@ static int mlx5_ib_create_counters(struct ib_counters *counters, } -static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) -{ - return MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == - MLX5_ESWITCH_OFFLOADS; -} - static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, u8 port_num) { @@ -456,12 +450,12 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); num_counters += ARRAY_SIZE(ext_ppcnt_cnts); } - cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); + cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL); if (!cnts->names) return -ENOMEM; cnts->offsets = kcalloc(num_counters, - sizeof(cnts->offsets), GFP_KERNEL); + sizeof(*cnts->offsets), GFP_KERNEL); if (!cnts->offsets) goto err_names; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index dceb0eb2bed1..eb92cefffd77 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -168,7 +168,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, { enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1); struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); - struct mlx5_ib_srq *srq; + struct mlx5_ib_srq *srq = NULL; struct mlx5_ib_wq *wq; u16 wqe_ctr; u8 roce_packet_type; @@ -180,7 +180,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, if (qp->ibqp.xrcd) { msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); - srq = to_mibsrq(msrq); + if (msrq) + srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); } @@ -254,7 +255,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, switch (roce_packet_type) { case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: - wc->network_hdr_type = RDMA_NETWORK_IB; + wc->network_hdr_type = RDMA_NETWORK_ROCE_V1; break; case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: wc->network_hdr_type = RDMA_NETWORK_IPV6; @@ -706,10 +707,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd = {}; + unsigned long page_size; + unsigned int page_offset_quantized; size_t ucmdlen; - int page_shift; __be64 *pas; - int npages; int ncont; void *cqc; int err; @@ -741,14 +742,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return err; } + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", - ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, + ib_umem_num_pages(cq->buf.umem), page_size, ncont); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; @@ -759,11 +770,12 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, - page_shift - MLX5_ADAPTER_PAGE_SHIFT); + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { *index = ucmd.uar_page_index; @@ -1023,16 +1035,21 @@ err_cqb: return err; } -void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); + int ret; + + ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); + if (ret) + return ret; - mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); if (udata) destroy_cq_user(mcq, udata); else destroy_cq_kernel(dev, mcq); + return 0; } static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) @@ -1122,13 +1139,12 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) } static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, - int entries, struct ib_udata *udata, int *npas, - int *page_shift, int *cqe_size) + int entries, struct ib_udata *udata, + int *cqe_size) { struct mlx5_ib_resize_cq ucmd; struct ib_umem *umem; int err; - int npages; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) @@ -1149,9 +1165,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, - npas, NULL); - cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; @@ -1244,7 +1257,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int err; int npas; __be64 *pas; - int page_shift; + unsigned int page_offset_quantized = 0; + unsigned int page_shift; int inlen; int cqe_size; unsigned long flags; @@ -1271,22 +1285,34 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) mutex_lock(&cq->resize_mutex); if (udata) { - err = resize_user(dev, cq, entries, udata, &npas, &page_shift, - &cqe_size); + unsigned long page_size; + + err = resize_user(dev, cq, entries, udata, &cqe_size); + if (err) + goto ex; + + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->resize_umem, cqc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto ex_resize; + } + npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size); + page_shift = order_base_2(page_size); } else { + struct mlx5_frag_buf *frag_buf; + cqe_size = 64; err = resize_kernel(dev, cq, entries, cqe_size); - if (!err) { - struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf; - - npas = frag_buf->npages; - page_shift = frag_buf->page_shift; - } + if (err) + goto ex; + frag_buf = &cq->resize_buf->frag_buf; + npas = frag_buf->npages; + page_shift = frag_buf->page_shift; } - if (err) - goto ex; - inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; @@ -1298,8 +1324,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) - mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, - pas, 0); + mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas, + 0); else mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); @@ -1313,6 +1339,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size, cq->private_flags & diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e3d8b826498..819c142857d6 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -93,9 +93,6 @@ struct devx_async_event_file { struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; - u32 page_offset; - int page_shift; - int ncont; u32 dinlen; u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; }; @@ -1311,7 +1308,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, else ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; devx_event_table = &dev->devx_event_table; @@ -2057,9 +2054,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, u64 addr; size_t size; u32 access; - int npages; int err; - u32 page_mask; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) @@ -2073,57 +2068,62 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - err = ib_check_mr_access(access); + err = ib_check_mr_access(&dev->ib_dev, access); if (err) return err; obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); - - mlx5_ib_cont_pages(obj->umem, obj->umem->address, - MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &obj->page_shift, &obj->ncont, NULL); - - if (!npages) { - ib_umem_release(obj->umem); - return -EINVAL; - } - - page_mask = (1 << obj->page_shift) - 1; - obj->page_offset = obj->umem->address & page_mask; - return 0; } -static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, +static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, + struct uverbs_attr_bundle *attrs, struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { + unsigned int page_size; + __be64 *mtt; + void *umem; + + /* + * We don't know what the user intends to use this umem for, but the HW + * restrictions must be met. MR, doorbell records, QP, WQ and CQ all + * have different requirements. Since we have no idea how to sort this + * out, only support PAGE_SIZE with the expectation that userspace will + * provide the necessary alignments inside the known PAGE_SIZE and that + * FW will check everything. + */ + page_size = ib_umem_find_best_pgoff( + obj->umem, PAGE_SIZE, + __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset), + 0)); + if (!page_size) + return -EINVAL; + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + - (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + (MLX5_ST_SZ_BYTES(mtt) * + ib_umem_num_dma_blocks(obj->umem, page_size)); cmd->in = uverbs_zalloc(attrs, cmd->inlen); - return PTR_ERR_OR_ZERO(cmd->in); -} - -static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, - struct devx_umem *obj, - struct devx_umem_reg_cmd *cmd) -{ - void *umem; - __be64 *mtt; + if (IS_ERR(cmd->in)) + return PTR_ERR(cmd->in); umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); - MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); - MLX5_SET(umem, umem, log_page_size, obj->page_shift - - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(umem, umem, page_offset, obj->page_offset); - mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + MLX5_SET64(umem, umem, num_of_mtt, + ib_umem_num_dma_blocks(obj->umem, page_size)); + MLX5_SET(umem, umem, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, + ib_umem_dma_offset(obj->umem, page_size)); + + mlx5_ib_populate_pas(obj->umem, page_size, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | - MLX5_IB_MTT_READ); + MLX5_IB_MTT_READ); + return 0; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( @@ -2150,12 +2150,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); if (err) goto err_umem_release; - devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); @@ -2187,7 +2185,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, int err; err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(err, why, uobject)) + if (err) return err; ib_umem_release(obj->umem); @@ -2600,8 +2598,8 @@ static const struct file_operations devx_async_event_fops = { .llseek = no_llseek, }; -static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, @@ -2623,11 +2621,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, kvfree(entry); } spin_unlock_irq(&comp_ev_file->ev_queue.lock); - return 0; }; -static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, @@ -2671,7 +2668,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, mutex_unlock(&dev->devx_event_table.event_xa_lock); put_device(&dev->ib_dev.dev); - return 0; }; DECLARE_UVERBS_NAMED_METHOD( diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index e9cfb9a2ef41..25da0b05b4e2 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -136,12 +136,9 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) #define LAST_COUNTERS_FIELD counters /* Field is the last supported field */ -#define FIELDS_NOT_SUPPORTED(filter, field)\ - memchr_inv((void *)&filter.field +\ - sizeof(filter.field), 0,\ - sizeof(filter) -\ - offsetof(typeof(filter), field) -\ - sizeof(filter.field)) +#define FIELDS_NOT_SUPPORTED(filter, field) \ + memchr_inv((void *)&filter.field + sizeof(filter.field), 0, \ + sizeof(filter) - offsetofend(typeof(filter), field)) int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, bool is_egress, @@ -767,6 +764,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, { bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; + enum mlx5_flow_namespace_type fn_type; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_table *ft; int max_table_size; @@ -780,11 +778,9 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, log_max_ft_size)); esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - enum mlx5_flow_namespace_type fn_type; - - if (flow_is_multicast_only(flow_attr) && - !dont_trap) + switch (flow_attr->type) { + case IB_FLOW_ATTR_NORMAL: + if (flow_is_multicast_only(flow_attr) && !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else priority = ib_prio_to_core_prio(flow_attr->priority, @@ -797,12 +793,11 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; if (!dev->is_rep && !esw_encap && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) + reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } else { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX( + dev->mdev, log_max_ft_size)); fn_type = MLX5_FLOW_NAMESPACE_EGRESS; prio = &dev->flow_db->egress_prios[priority]; if (!dev->is_rep && !esw_encap && @@ -812,27 +807,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ns = mlx5_get_flow_namespace(dev->mdev, fn_type); num_entries = MLX5_FS_MAX_ENTRIES; num_groups = MLX5_FS_MAX_TYPES; - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + break; + case IB_FLOW_ATTR_ALL_DEFAULT: + case IB_FLOW_ATTR_MC_DEFAULT: ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_LEFTOVERS); - build_leftovers_ft_param(&priority, - &num_entries, - &num_groups); + build_leftovers_ft_param(&priority, &num_entries, &num_groups); prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + break; + case IB_FLOW_ATTR_SNIFFER: if (!MLX5_CAP_FLOWTABLE(dev->mdev, allow_sniffer_and_nic_rx_shared_tir)) return ERR_PTR(-EOPNOTSUPP); - ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? - MLX5_FLOW_NAMESPACE_SNIFFER_RX : - MLX5_FLOW_NAMESPACE_SNIFFER_TX); + ns = mlx5_get_flow_namespace( + dev->mdev, ft_type == MLX5_IB_FT_RX ? + MLX5_FLOW_NAMESPACE_SNIFFER_RX : + MLX5_FLOW_NAMESPACE_SNIFFER_TX); prio = &dev->flow_db->sniffer[ft_type]; priority = 0; num_entries = 1; num_groups = 1; + break; + default: + break; } if (!ns) @@ -954,7 +953,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); - if (dev->is_rep) { + if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) { struct mlx5_eswitch_rep *rep; rep = dev->port[flow_attr->port - 1].rep; @@ -1116,6 +1115,7 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, int err; static const struct ib_flow_attr flow_attr = { .num_of_specs = 0, + .type = IB_FLOW_ATTR_SNIFFER, .size = sizeof(flow_attr) }; @@ -1143,10 +1143,8 @@ err: return ERR_PTR(err); } - static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); @@ -1162,8 +1160,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, int underlay_qpn; if (udata && udata->inlen) { - min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + - sizeof(ucmd_hdr.reserved); + min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved); if (udata->inlen < min_ucmd_sz) return ERR_PTR(-EOPNOTSUPP); @@ -1197,10 +1194,9 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, goto free_ucmd; } - if (domain != IB_FLOW_DOMAIN_USER || - flow_attr->port > dev->num_ports || - (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | - IB_FLOW_ATTR_FLAGS_EGRESS))) { + if (flow_attr->port > dev->num_ports || + (flow_attr->flags & + ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) { err = -EINVAL; goto free_ucmd; } @@ -1245,19 +1241,22 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, dst->tir_num = mqp->raw_packet_qp.rq.tirn; } - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + switch (flow_attr->type) { + case IB_FLOW_ATTR_NORMAL: underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? mqp->underlay_qpn : 0; handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, underlay_qpn, ucmd); - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - handler = create_leftovers_rule(dev, ft_prio, flow_attr, - dst); - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + break; + case IB_FLOW_ATTR_ALL_DEFAULT: + case IB_FLOW_ATTR_MC_DEFAULT: + handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst); + break; + case IB_FLOW_ATTR_SNIFFER: handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); - } else { + break; + default: err = -EINVAL; goto destroy_ft; } @@ -1305,39 +1304,47 @@ _get_flow_table(struct mlx5_ib_dev *dev, esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); + switch (fs_matcher->ns_type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + break; + case MLX5_FLOW_NAMESPACE_EGRESS: max_table_size = BIT( MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && + !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + break; + case MLX5_FLOW_NAMESPACE_FDB: max_table_size = BIT( MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, + reformat_l3_tunnel_to_l2) && esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; priority = FDB_BYPASS_PATH; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, - log_max_ft_size)); + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size)); priority = fs_matcher->priority; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - log_max_ft_size)); + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size)); priority = fs_matcher->priority; + break; + default: + break; } max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); @@ -1346,16 +1353,24 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (!ns) return ERR_PTR(-EOPNOTSUPP); - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) + switch (fs_matcher->ns_type) { + case MLX5_FLOW_NAMESPACE_BYPASS: prio = &dev->flow_db->prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + break; + case MLX5_FLOW_NAMESPACE_EGRESS: prio = &dev->flow_db->egress_prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) + break; + case MLX5_FLOW_NAMESPACE_FDB: prio = &dev->flow_db->fdb; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: prio = &dev->flow_db->rdma_rx[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: prio = &dev->flow_db->rdma_tx[priority]; + break; + default: return ERR_PTR(-EINVAL); + } if (!prio) return ERR_PTR(-EINVAL); @@ -1488,20 +1503,25 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add( goto unlock; } - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { + switch (dest_type) { + case MLX5_FLOW_DESTINATION_TYPE_TIR: dst[dst_num].type = dest_type; dst[dst_num++].tir_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; dst[dst_num++].ft_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + break; + case MLX5_FLOW_DESTINATION_TYPE_PORT: dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + break; + default: + break; } - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dst[dst_num].counter_id = counter_id; @@ -2015,11 +2035,9 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&obj->usecnt)) + return -EBUSY; kfree(obj); return 0; diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 40d418153891..7fcad9135276 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -35,44 +35,19 @@ struct mlx5_ib_gsi_wr { struct ib_cqe cqe; struct ib_wc wc; - int send_flags; bool completed:1; }; -struct mlx5_ib_gsi_qp { - struct ib_qp ibqp; - struct ib_qp *rx_qp; - u8 port_num; - struct ib_qp_cap cap; - enum ib_sig_type sq_sig_type; - /* Serialize qp state modifications */ - struct mutex mutex; - struct ib_cq *cq; - struct mlx5_ib_gsi_wr *outstanding_wrs; - u32 outstanding_pi, outstanding_ci; - int num_qps; - /* Protects access to the tx_qps. Post send operations synchronize - * with tx_qp creation in setup_qp(). Also protects the - * outstanding_wrs array and indices. - */ - spinlock_t lock; - struct ib_qp **tx_qps; -}; - -static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) -{ - return container_of(qp, struct mlx5_ib_gsi_qp, ibqp); -} - static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) { return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); } /* Call with gsi->lock locked */ -static void generate_completions(struct mlx5_ib_gsi_qp *gsi) +static void generate_completions(struct mlx5_ib_qp *mqp) { - struct ib_cq *gsi_cq = gsi->ibqp.send_cq; + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; + struct ib_cq *gsi_cq = mqp->ibqp.send_cq; struct mlx5_ib_gsi_wr *wr; u32 index; @@ -83,10 +58,7 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi) if (!wr->completed) break; - if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR || - wr->send_flags & IB_SEND_SIGNALED) - WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); - + WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); wr->completed = false; } @@ -98,6 +70,7 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) struct mlx5_ib_gsi_qp *gsi = cq->cq_context; struct mlx5_ib_gsi_wr *wr = container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe); + struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi); u64 wr_id; unsigned long flags; @@ -106,19 +79,19 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) wr_id = wr->wc.wr_id; wr->wc = *wc; wr->wc.wr_id = wr_id; - wr->wc.qp = &gsi->ibqp; + wr->wc.qp = &mqp->ibqp; - generate_completions(gsi); + generate_completions(mqp); spin_unlock_irqrestore(&gsi->lock, flags); } -struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr) +int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, + struct ib_qp_init_attr *attr) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_gsi_qp *gsi; - struct ib_qp_init_attr hw_init_attr = *init_attr; - const u8 port_num = init_attr->port_num; + struct ib_qp_init_attr hw_init_attr = *attr; + const u8 port_num = attr->port_num; int num_qps = 0; int ret; @@ -130,26 +103,19 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, num_qps = MLX5_MAX_PORTS; } - gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); - if (!gsi) - return ERR_PTR(-ENOMEM); - + gsi = &mqp->gsi; gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL); - if (!gsi->tx_qps) { - ret = -ENOMEM; - goto err_free; - } + if (!gsi->tx_qps) + return -ENOMEM; - gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr, - sizeof(*gsi->outstanding_wrs), - GFP_KERNEL); + gsi->outstanding_wrs = + kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs), + GFP_KERNEL); if (!gsi->outstanding_wrs) { ret = -ENOMEM; goto err_free_tx; } - mutex_init(&gsi->mutex); - mutex_lock(&dev->devr.mutex); if (dev->devr.ports[port_num - 1].gsi) { @@ -161,12 +127,10 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, gsi->num_qps = num_qps; spin_lock_init(&gsi->lock); - gsi->cap = init_attr->cap; - gsi->sq_sig_type = init_attr->sq_sig_type; - gsi->ibqp.qp_num = 1; + gsi->cap = attr->cap; gsi->port_num = port_num; - gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0, + gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0, IB_POLL_SOFTIRQ); if (IS_ERR(gsi->cq)) { mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n", @@ -182,19 +146,31 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, hw_init_attr.cap.max_send_sge = 0; hw_init_attr.cap.max_inline_data = 0; } - gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); + + gsi->rx_qp = mlx5_ib_create_qp(pd, &hw_init_attr, NULL); if (IS_ERR(gsi->rx_qp)) { mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", PTR_ERR(gsi->rx_qp)); ret = PTR_ERR(gsi->rx_qp); goto err_destroy_cq; } + gsi->rx_qp->device = pd->device; + gsi->rx_qp->pd = pd; + gsi->rx_qp->real_qp = gsi->rx_qp; + + gsi->rx_qp->qp_type = hw_init_attr.qp_type; + gsi->rx_qp->send_cq = hw_init_attr.send_cq; + gsi->rx_qp->recv_cq = hw_init_attr.recv_cq; + gsi->rx_qp->event_handler = hw_init_attr.event_handler; + spin_lock_init(&gsi->rx_qp->mr_lock); + INIT_LIST_HEAD(&gsi->rx_qp->rdma_mrs); + INIT_LIST_HEAD(&gsi->rx_qp->sig_mrs); - dev->devr.ports[init_attr->port_num - 1].gsi = gsi; + dev->devr.ports[attr->port_num - 1].gsi = gsi; mutex_unlock(&dev->devr.mutex); - return &gsi->ibqp; + return 0; err_destroy_cq: ib_free_cq(gsi->cq); @@ -203,23 +179,19 @@ err_free_wrs: kfree(gsi->outstanding_wrs); err_free_tx: kfree(gsi->tx_qps); -err_free: - kfree(gsi); - return ERR_PTR(ret); + return ret; } -int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) +int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp) { - struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; const int port_num = gsi->port_num; int qp_index; int ret; - mlx5_ib_dbg(dev, "destroying GSI QP\n"); - mutex_lock(&dev->devr.mutex); - ret = ib_destroy_qp(gsi->rx_qp); + ret = mlx5_ib_destroy_qp(gsi->rx_qp, NULL); if (ret) { mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n", ret); @@ -241,7 +213,7 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) kfree(gsi->outstanding_wrs); kfree(gsi->tx_qps); - kfree(gsi); + kfree(mqp); return 0; } @@ -259,7 +231,6 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) .max_send_sge = gsi->cap.max_send_sge, .max_inline_data = gsi->cap.max_inline_data, }, - .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; @@ -370,56 +341,54 @@ err_destroy_qp: static void setup_qps(struct mlx5_ib_gsi_qp *gsi) { + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); u16 qp_index; + mutex_lock(&dev->devr.mutex); for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) setup_qp(gsi, qp_index); + mutex_unlock(&dev->devr.mutex); } int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask) { struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; int ret; mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state); - mutex_lock(&gsi->mutex); ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask); if (ret) { mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret); - goto unlock; + return ret; } if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS) setup_qps(gsi); - -unlock: - mutex_unlock(&gsi->mutex); - - return ret; + return 0; } int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; int ret; - mutex_lock(&gsi->mutex); ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr); qp_init_attr->cap = gsi->cap; - mutex_unlock(&gsi->mutex); - return ret; } /* Call with gsi->lock locked */ -static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, +static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr, struct ib_wc *wc) { + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); struct mlx5_ib_gsi_wr *gsi_wr; @@ -448,22 +417,21 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, } /* Call with gsi->lock locked */ -static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, - struct ib_ud_wr *wr) +static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr) { struct ib_wc wc = { { .wr_id = wr->wr.wr_id }, .status = IB_WC_SUCCESS, .opcode = IB_WC_SEND, - .qp = &gsi->ibqp, + .qp = &mqp->ibqp, }; int ret; - ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc); + ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc); if (ret) return ret; - generate_completions(gsi); + generate_completions(mqp); return 0; } @@ -490,7 +458,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; struct ib_qp *tx_qp; unsigned long flags; int ret; @@ -503,14 +472,14 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, spin_lock_irqsave(&gsi->lock, flags); tx_qp = get_tx_qp(gsi, &cur_wr); if (!tx_qp) { - ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr); + ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr); if (ret) goto err; spin_unlock_irqrestore(&gsi->lock, flags); continue; } - ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL); + ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL); if (ret) goto err; @@ -534,7 +503,8 @@ err: int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; return ib_post_recv(gsi->rx_qp, wr, bad_wr); } @@ -544,7 +514,5 @@ void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi) if (!gsi) return; - mutex_lock(&gsi->mutex); setup_qps(gsi); - mutex_unlock(&gsi->mutex); } diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 5c3d052ac30b..9164cc069ad4 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -13,7 +13,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) struct mlx5_ib_dev *ibdev; int vport_index; - ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch); + ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); vport_index = rep->vport_index; ibdev->port[vport_index].rep = rep; @@ -33,6 +33,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; int vport_index; + int ret; if (rep->vport == MLX5_VPORT_UPLINK) profile = &raw_eth_profile; @@ -46,8 +47,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port), GFP_KERNEL); if (!ibdev->port) { - ib_dealloc_device(&ibdev->ib_dev); - return -ENOMEM; + ret = -ENOMEM; + goto fail_port; } ibdev->is_rep = true; @@ -58,12 +59,24 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->mdev = dev; ibdev->num_ports = num_ports; - if (!__mlx5_ib_add(ibdev, profile)) - return -EINVAL; + ret = __mlx5_ib_add(ibdev, profile); + if (ret) + goto fail_add; rep->rep_data[REP_IB].priv = ibdev; return 0; + +fail_add: + kfree(ibdev->port); +fail_port: + ib_dealloc_device(&ibdev->ib_dev); + return ret; +} + +static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) +{ + return rep->rep_data[REP_IB].priv; } static void @@ -83,59 +96,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } -static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) -{ - return mlx5_ib_rep_to_dev(rep); -} - static const struct mlx5_eswitch_rep_ops rep_ops = { .load = mlx5_ib_vport_rep_load, .unload = mlx5_ib_vport_rep_unload, - .get_proto_dev = mlx5_ib_vport_get_proto_dev, + .get_proto_dev = mlx5_ib_rep_to_dev, }; -void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - - mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); -} - -void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - - mlx5_eswitch_unregister_vport_reps(esw, REP_IB); -} - -u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) -{ - return mlx5_eswitch_mode(esw); -} - -struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - u16 vport_num) -{ - return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_IB); -} - struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, u16 vport_num) { return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH); } -struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) -{ - return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB); -} - -struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - u16 vport_num) -{ - return mlx5_eswitch_vport_rep(esw, vport_num); -} - struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u16 port) @@ -154,3 +126,49 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport, sq->base.mqp.qpn); } + +static int mlx5r_rep_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; + struct mlx5_eswitch *esw; + + esw = mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); + return 0; +} + +static void mlx5r_rep_remove(struct auxiliary_device *adev) +{ + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; + struct mlx5_eswitch *esw; + + esw = mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); +} + +static const struct auxiliary_device_id mlx5r_rep_id_table[] = { + { .name = MLX5_ADEV_NAME ".rdma-rep", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table); + +static struct auxiliary_driver mlx5r_rep_driver = { + .name = "rep", + .probe = mlx5r_rep_probe, + .remove = mlx5r_rep_remove, + .id_table = mlx5r_rep_id_table, +}; + +int mlx5r_rep_init(void) +{ + return auxiliary_driver_register(&mlx5r_rep_driver); +} + +void mlx5r_rep_cleanup(void) +{ + auxiliary_driver_unregister(&mlx5r_rep_driver); +} diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 5b30d3fa8f8d..ce1dcb105dbd 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -12,47 +12,16 @@ extern const struct mlx5_ib_profile raw_eth_profile; #ifdef CONFIG_MLX5_ESWITCH -u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); -struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - u16 vport_num); -struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw); -struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - u16 vport_num); -void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); -void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); +int mlx5r_rep_init(void); +void mlx5r_rep_cleanup(void); struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u16 port); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, u16 vport_num); #else /* CONFIG_MLX5_ESWITCH */ -static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) -{ - return MLX5_ESWITCH_NONE; -} - -static inline -struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - u16 vport_num) -{ - return NULL; -} - -static inline -struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) -{ - return NULL; -} - -static inline -struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - u16 vport_num) -{ - return NULL; -} - -static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {} -static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {} +static inline int mlx5r_rep_init(void) { return 0; } +static inline void mlx5r_rep_cleanup(void) {} static inline struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, @@ -68,10 +37,4 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, return NULL; } #endif - -static inline -struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) -{ - return rep->rep_data[REP_IB].priv; -} #endif /* __MLX5_IB_REP_H__ */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d60d63221b14..aabdc07e4753 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -75,12 +75,6 @@ static LIST_HEAD(mlx5_ib_dev_list); */ static DEFINE_MUTEX(mlx5_ib_multiport_mutex); -/* We can't use an array for xlt_emergency_page because dma_map_single - * doesn't work on kernel modules memory - */ -static unsigned long xlt_emergency_page; -static struct mutex xlt_emergency_page_mutex; - struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) { struct mlx5_ib_dev *dev; @@ -326,8 +320,8 @@ out: spin_unlock(&port->mp.mpi_lock); } -static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, - u8 *active_width) +static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, + u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): @@ -384,7 +378,7 @@ static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { @@ -425,10 +419,22 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_NDR; + break; case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_NDR; + break; + case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_NDR; + break; default: return -EINVAL; } @@ -436,7 +442,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width, bool ext) { return ext ? @@ -546,7 +552,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { - enum ib_gid_type gid_type = IB_GID_TYPE_IB; + enum ib_gid_type gid_type = IB_GID_TYPE_ROCE; u16 vlan_id = 0xffff; u8 roce_version = 0; u8 roce_l3_type = 0; @@ -561,7 +567,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, } switch (gid_type) { - case IB_GID_TYPE_IB: + case IB_GID_TYPE_ROCE: roce_version = MLX5_ROCE_VERSION_1; break; case IB_GID_TYPE_ROCE_UDP_ENCAP: @@ -840,7 +846,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, /* We support 'Gappy' memory registration too */ props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; } - props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + /* IB_WR_REG_MR always requires changing the entity size with UMR */ + if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ @@ -1175,32 +1183,24 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, return 0; } -enum mlx5_ib_width { - MLX5_IB_WIDTH_1X = 1 << 0, - MLX5_IB_WIDTH_2X = 1 << 1, - MLX5_IB_WIDTH_4X = 1 << 2, - MLX5_IB_WIDTH_8X = 1 << 3, - MLX5_IB_WIDTH_12X = 1 << 4 -}; - -static void translate_active_width(struct ib_device *ibdev, u8 active_width, - u8 *ib_width) +static void translate_active_width(struct ib_device *ibdev, u16 active_width, + u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - if (active_width & MLX5_IB_WIDTH_1X) + if (active_width & MLX5_PTYS_WIDTH_1X) *ib_width = IB_WIDTH_1X; - else if (active_width & MLX5_IB_WIDTH_2X) + else if (active_width & MLX5_PTYS_WIDTH_2X) *ib_width = IB_WIDTH_2X; - else if (active_width & MLX5_IB_WIDTH_4X) + else if (active_width & MLX5_PTYS_WIDTH_4X) *ib_width = IB_WIDTH_4X; - else if (active_width & MLX5_IB_WIDTH_8X) + else if (active_width & MLX5_PTYS_WIDTH_8X) *ib_width = IB_WIDTH_8X; - else if (active_width & MLX5_IB_WIDTH_12X) + else if (active_width & MLX5_PTYS_WIDTH_12X) *ib_width = IB_WIDTH_12X; else { mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n", - (int)active_width); + active_width); *ib_width = IB_WIDTH_4X; } @@ -1277,7 +1277,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, u16 max_mtu; u16 oper_mtu; int err; - u8 ib_link_width_oper; + u16 ib_link_width_oper; u8 vl_hw_cap; rep = kzalloc(sizeof(*rep), GFP_KERNEL); @@ -1310,16 +1310,13 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) props->port_cap_flags2 = rep->cap_mask2; - err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); + err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, + &props->active_speed, port); if (err) goto out; translate_active_width(ibdev, ib_link_width_oper, &props->active_width); - err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); - if (err) - goto out; - mlx5_query_port_max_mtu(mdev, &max_mtu, port); props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); @@ -2354,7 +2351,9 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev, return -EPERM; if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner))) + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner_v2))) return -EOPNOTSUPP; break; } @@ -2569,12 +2568,12 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); - mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); + return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); } static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) @@ -2635,7 +2634,7 @@ static ssize_t fw_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); + return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages); } static DEVICE_ATTR_RO(fw_pages); @@ -2645,7 +2644,7 @@ static ssize_t reg_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); + return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } static DEVICE_ATTR_RO(reg_pages); @@ -2655,7 +2654,7 @@ static ssize_t hca_type_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); + return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2665,7 +2664,7 @@ static ssize_t hw_rev_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->mdev->rev_id); + return sysfs_emit(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2675,8 +2674,8 @@ static ssize_t board_id_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, - dev->mdev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -2699,9 +2698,7 @@ static void pkey_change_handler(struct work_struct *work) container_of(work, struct mlx5_ib_port_resources, pkey_change_work); - mutex_lock(&ports->devr->mutex); mlx5_ib_gsi_pkey_change(ports->gsi); - mutex_unlock(&ports->devr->mutex); } static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) @@ -3127,11 +3124,9 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s1->usecnt, 0); - for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) INIT_WORK(&devr->ports[port].pkey_change_work, pkey_change_handler); - devr->ports[port].devr = devr; - } return 0; @@ -3959,7 +3954,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) err = set_has_smi_cap(dev); if (err) - return err; + goto err_mp; if (!mlx5_core_mp_enabled(mdev)) { for (i = 1; i <= dev->num_ports; i++) { @@ -4033,6 +4028,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .create_cq = mlx5_ib_create_cq, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, + .create_user_ah = mlx5_ib_create_ah, .dealloc_pd = mlx5_ib_dealloc_pd, .dealloc_ucontext = mlx5_ib_dealloc_ucontext, .del_gid = mlx5_ib_del_gid, @@ -4098,6 +4094,8 @@ static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { static const struct ib_device_ops mlx5_ib_dev_mw_ops = { .alloc_mw = mlx5_ib_alloc_mw, .dealloc_mw = mlx5_ib_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw), }; static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { @@ -4148,42 +4146,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - dev->ib_dev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) ib_set_device_ops(&dev->ib_dev, @@ -4194,19 +4156,11 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); - if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + if (MLX5_CAP_GEN(mdev, imaicl)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); - } - if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + if (MLX5_CAP_GEN(mdev, xrc)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); - } if (MLX5_CAP_DEV_MEM(mdev, memic) || MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & @@ -4268,6 +4222,9 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { .destroy_wq = mlx5_ib_destroy_wq, .get_netdev = mlx5_ib_get_netdev, .modify_wq = mlx5_ib_modify_wq, + + INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table, + ib_rwq_ind_tbl), }; static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) @@ -4282,12 +4239,6 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; @@ -4366,7 +4317,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); if (err) - mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); + mlx5_free_bfreg(dev->mdev, &dev->bfreg); return err; } @@ -4386,7 +4337,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) name = "mlx5_%d"; else name = "mlx5_bond_%d"; - return ib_register_device(&dev->ib_dev, name); + return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev); } static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) @@ -4597,8 +4548,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ib_dealloc_device(&dev->ib_dev); } -void *__mlx5_ib_add(struct mlx5_ib_dev *dev, - const struct mlx5_ib_profile *profile) +int __mlx5_ib_add(struct mlx5_ib_dev *dev, + const struct mlx5_ib_profile *profile) { int err; int i; @@ -4614,13 +4565,16 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } dev->ib_active = true; - - return dev; + return 0; err_out: - __mlx5_ib_remove(dev, profile, i); - - return NULL; + /* Clean up stages which were initialized */ + while (i) { + i--; + if (profile->stage[i].cleanup) + profile->stage[i].cleanup(dev); + } + return -ENOMEM; } static const struct mlx5_ib_profile pf_profile = { @@ -4743,8 +4697,11 @@ const struct mlx5_ib_profile raw_eth_profile = { NULL), }; -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) +static int mlx5r_mp_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) { + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; bool bound = false; @@ -4752,15 +4709,14 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); if (!mpi) - return NULL; + return -ENOMEM; mpi->mdev = mdev; - err = mlx5_query_nic_vport_system_image_guid(mdev, &mpi->sys_image_guid); if (err) { kfree(mpi); - return NULL; + return err; } mutex_lock(&mlx5_ib_multiport_mutex); @@ -4781,40 +4737,46 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) } mutex_unlock(&mlx5_ib_multiport_mutex); - return mpi; + dev_set_drvdata(&adev->dev, mpi); + return 0; +} + +static void mlx5r_mp_remove(struct auxiliary_device *adev) +{ + struct mlx5_ib_multiport_info *mpi; + + mpi = dev_get_drvdata(&adev->dev); + mutex_lock(&mlx5_ib_multiport_mutex); + if (mpi->ibdev) + mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); + list_del(&mpi->list); + mutex_unlock(&mlx5_ib_multiport_mutex); + kfree(mpi); } -static void *mlx5_ib_add(struct mlx5_core_dev *mdev) +static int mlx5r_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) { + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; const struct mlx5_ib_profile *profile; + int port_type_cap, num_ports, ret; enum rdma_link_layer ll; struct mlx5_ib_dev *dev; - int port_type_cap; - int num_ports; - - if (MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) { - if (!mlx5_core_mp_enabled(mdev)) - mlx5_ib_register_vport_reps(mdev); - return mdev; - } port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) - return mlx5_ib_add_slave_port(mdev); - num_ports = max(MLX5_CAP_GEN(mdev, num_ports), MLX5_CAP_GEN(mdev, num_vhca_ports)); dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) - return NULL; + return -ENOMEM; dev->port = kcalloc(num_ports, sizeof(*dev->port), GFP_KERNEL); if (!dev->port) { ib_dealloc_device(&dev->ib_dev); - return NULL; + return -ENOMEM; } dev->mdev = mdev; @@ -4825,80 +4787,96 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) else profile = &pf_profile; - return __mlx5_ib_add(dev, profile); + ret = __mlx5_ib_add(dev, profile); + if (ret) { + kfree(dev->port); + ib_dealloc_device(&dev->ib_dev); + return ret; + } + + dev_set_drvdata(&adev->dev, dev); + return 0; } -static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) +static void mlx5r_remove(struct auxiliary_device *adev) { - struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; - if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) { - mlx5_ib_unregister_vport_reps(mdev); - return; - } - - if (mlx5_core_is_mp_slave(mdev)) { - mpi = context; - mutex_lock(&mlx5_ib_multiport_mutex); - if (mpi->ibdev) - mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); - list_del(&mpi->list); - mutex_unlock(&mlx5_ib_multiport_mutex); - kfree(mpi); - return; - } - - dev = context; + dev = dev_get_drvdata(&adev->dev); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } -static struct mlx5_interface mlx5_ib_interface = { - .add = mlx5_ib_add, - .remove = mlx5_ib_remove, - .protocol = MLX5_INTERFACE_PROTOCOL_IB, +static const struct auxiliary_device_id mlx5r_mp_id_table[] = { + { .name = MLX5_ADEV_NAME ".multiport", }, + {}, }; -unsigned long mlx5_ib_get_xlt_emergency_page(void) -{ - mutex_lock(&xlt_emergency_page_mutex); - return xlt_emergency_page; -} +static const struct auxiliary_device_id mlx5r_id_table[] = { + { .name = MLX5_ADEV_NAME ".rdma", }, + {}, +}; -void mlx5_ib_put_xlt_emergency_page(void) -{ - mutex_unlock(&xlt_emergency_page_mutex); -} +MODULE_DEVICE_TABLE(auxiliary, mlx5r_mp_id_table); +MODULE_DEVICE_TABLE(auxiliary, mlx5r_id_table); + +static struct auxiliary_driver mlx5r_mp_driver = { + .name = "multiport", + .probe = mlx5r_mp_probe, + .remove = mlx5r_mp_remove, + .id_table = mlx5r_mp_id_table, +}; + +static struct auxiliary_driver mlx5r_driver = { + .name = "rdma", + .probe = mlx5r_probe, + .remove = mlx5r_remove, + .id_table = mlx5r_id_table, +}; static int __init mlx5_ib_init(void) { - int err; + int ret; - xlt_emergency_page = __get_free_page(GFP_KERNEL); + xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL); if (!xlt_emergency_page) return -ENOMEM; - mutex_init(&xlt_emergency_page_mutex); - mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); if (!mlx5_ib_event_wq) { - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); return -ENOMEM; } mlx5_ib_odp_init(); + ret = mlx5r_rep_init(); + if (ret) + goto rep_err; + ret = auxiliary_driver_register(&mlx5r_mp_driver); + if (ret) + goto mp_err; + ret = auxiliary_driver_register(&mlx5r_driver); + if (ret) + goto drv_err; + return 0; - err = mlx5_register_interface(&mlx5_ib_interface); - - return err; +drv_err: + auxiliary_driver_unregister(&mlx5r_mp_driver); +mp_err: + mlx5r_rep_cleanup(); +rep_err: + destroy_workqueue(mlx5_ib_event_wq); + free_page((unsigned long)xlt_emergency_page); + return ret; } static void __exit mlx5_ib_cleanup(void) { - mlx5_unregister_interface(&mlx5_ib_interface); + auxiliary_driver_unregister(&mlx5r_driver); + auxiliary_driver_unregister(&mlx5r_mp_driver); + mlx5r_rep_cleanup(); + destroy_workqueue(mlx5_ib_event_wq); - mutex_destroy(&xlt_emergency_page_mutex); - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); } module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index c19ec9fd8a63..844545064c9e 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -36,161 +36,65 @@ #include "mlx5_ib.h" #include <linux/jiffies.h> -/* @umem: umem object to scan - * @addr: ib virtual address requested by the user - * @max_page_shift: high limit for page_shift - 0 means no limit - * @count: number of PAGE_SIZE pages covered by umem - * @shift: page shift for the compound pages found in the region - * @ncont: number of compund pages - * @order: log2 of the number of compound pages +/* + * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be + * filled in the pas array. */ -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order) +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags) { - unsigned long tmp; - unsigned long m; - u64 base = ~0, p = 0; - u64 len, pfn; - int i = 0; - struct scatterlist *sg; - int entry; - - addr = addr >> PAGE_SHIFT; - tmp = (unsigned long)addr; - m = find_first_bit(&tmp, BITS_PER_LONG); - if (max_page_shift) - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - pfn = sg_dma_address(sg) >> PAGE_SHIFT; - if (base + p != pfn) { - /* If either the offset or the new - * base are unaligned update m - */ - tmp = (unsigned long)(pfn | p); - if (!IS_ALIGNED(tmp, 1 << m)) - m = find_first_bit(&tmp, BITS_PER_LONG); - - base = pfn; - p = 0; - } - - p += len; - i += len; - } - - if (i) { - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - - if (order) - *order = ilog2(roundup_pow_of_two(i) >> m); - - *ncont = DIV_ROUND_UP(i, (1 << m)); - } else { - m = 0; + struct ib_block_iter biter; - if (order) - *order = 0; - - *ncont = 0; + rdma_umem_for_each_dma_block (umem, &biter, page_size) { + *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) | + access_flags); + pas++; } - *shift = PAGE_SHIFT + m; - *count = i; } /* - * Populate the given array with bus addresses from the umem. - * - * dev - mlx5_ib device - * umem - umem to use to fill the pages - * page_shift - determines the page size used in the resulting array - * offset - offset into the umem to start from, - * only implemented for ODP umems - * num_pages - total number of pages to fill - * pas - bus addresses array to fill - * access_flags - access flags to set on all present pages. - use enum mlx5_ib_mtt_access_flags for this. + * Compute the page shift and page_offset for mailboxes that use a quantized + * page_offset. The granulatity of the page offset scales according to page + * size. */ -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags) +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized) { - int shift = page_shift - PAGE_SHIFT; - int mask = (1 << shift) - 1; - int i, k, idx; - u64 cur = 0; - u64 base; - int len; - struct scatterlist *sg; - int entry; - - i = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - base = sg_dma_address(sg); - - /* Skip elements below offset */ - if (i + len < offset << shift) { - i += len; - continue; - } - - /* Skip pages below offset */ - if (i < offset << shift) { - k = (offset << shift) - i; - i = offset << shift; - } else { - k = 0; - } - - for (; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - cur |= access_flags; - idx = (i >> shift) - offset; - - pas[idx] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[idx])); - } - i++; - - /* Stop after num_pages reached */ - if (i >> shift >= offset + num_pages) - return; - } + const u64 page_offset_mask = (1UL << page_offset_bits) - 1; + unsigned long page_size; + u64 page_offset; + + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask); + if (!page_size) + return 0; + + /* + * page size is the largest possible page size. + * + * Reduce the page_size, and thus the page_offset and quanta, until the + * page_offset fits into the mailbox field. Once page_size < scale this + * loop is guaranteed to terminate. + */ + page_offset = ib_umem_dma_offset(umem, page_size); + while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) { + page_size /= 2; + page_offset = ib_umem_dma_offset(umem, page_size); } -} -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags) -{ - return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, - ib_umem_num_pages(umem), pas, - access_flags); -} -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) -{ - u64 page_size; - u64 page_mask; - u64 off_size; - u64 off_mask; - u64 buf_off; - - page_size = (u64)1 << page_shift; - page_mask = page_size - 1; - buf_off = addr & page_mask; - off_size = page_size >> 6; - off_mask = off_size - 1; - - if (buf_off & off_mask) - return -EINVAL; - - *offset = buf_off >> ilog2(off_size); - return 0; + /* + * The address is not aligned, or otherwise cannot be represented by the + * page_offset. + */ + if (!(pgsz_bitmap & page_size)) + return 0; + + *page_offset_quantized = + (unsigned long)page_offset / (page_size / scale); + if (WARN_ON(*page_offset_quantized > page_offset_mask)) + return 0; + return page_size; } #define WR_ID_BF 0xBF diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5287fc868662..b0fdc1b08e06 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -40,7 +40,73 @@ #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) -#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) +static __always_inline unsigned long +__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, + unsigned int pgsz_shift) +{ + unsigned int largest_pg_shift = + min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift, + BITS_PER_LONG - 1); + + /* + * Despite a command allowing it, the device does not support lower than + * 4k page size. + */ + pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift); + return GENMASK(largest_pg_shift, pgsz_shift); +} + +/* + * For mkc users, instead of a page_offset the command has a start_iova which + * specifies both the page_offset and the on-the-wire IOVA + */ +#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \ + ib_umem_find_best_pgsz(umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), \ + pgsz_shift), \ + iova) + +static __always_inline unsigned long +__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits, + unsigned int offset_shift) +{ + unsigned int largest_offset_shift = + min_t(unsigned long, page_offset_bits - 1 + offset_shift, + BITS_PER_LONG - 1); + + return GENMASK(largest_offset_shift, offset_shift); +} + +/* + * QP/CQ/WQ/etc type commands take a page offset that satisifies: + * page_offset_quantized * (page_size/scale) = page_offset + * Which restricts allowed page sizes to ones that satisify the above. + */ +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized); +#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), \ + GENMASK(31, order_base_2(scale)), scale, \ + page_offset_quantized) + +#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \ + page_offset_quantized) enum { MLX5_IB_MMAP_OFFSET_START = 9, @@ -384,6 +450,22 @@ struct mlx5_ib_dct { u32 *in; }; +struct mlx5_ib_gsi_qp { + struct ib_qp *rx_qp; + u8 port_num; + struct ib_qp_cap cap; + struct ib_cq *cq; + struct mlx5_ib_gsi_wr *outstanding_wrs; + u32 outstanding_pi, outstanding_ci; + int num_qps; + /* Protects access to the tx_qps. Post send operations synchronize + * with tx_qp creation in setup_qp(). Also protects the + * outstanding_wrs array and indices. + */ + spinlock_t lock; + struct ib_qp **tx_qps; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; union { @@ -391,6 +473,7 @@ struct mlx5_ib_qp { struct mlx5_ib_raw_packet_qp raw_packet_qp; struct mlx5_ib_rss_qp rss_qp; struct mlx5_ib_dct dct; + struct mlx5_ib_gsi_qp gsi; }; struct mlx5_frag_buf buf; @@ -580,14 +663,12 @@ struct mlx5_ib_mr { int max_descs; int desc_size; int access_mode; + unsigned int page_shift; struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - unsigned int order; struct mlx5_cache_ent *cache_ent; - int npages; - struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; void *descs_alloc; @@ -693,10 +774,7 @@ struct mlx5_mr_cache { unsigned long last_add; }; -struct mlx5_ib_gsi_qp; - struct mlx5_ib_port_resources { - struct mlx5_ib_resources *devr; struct mlx5_ib_gsi_qp *gsi; struct work_struct pkey_change_work; }; @@ -1028,6 +1106,11 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } +static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr) +{ + return to_mdev(mr->ibmr.device); +} + static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) { struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( @@ -1119,13 +1202,16 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); +static inline int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return 0; +} int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); -void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); @@ -1148,7 +1234,7 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, size_t buflen, size_t *bc); int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -1163,8 +1249,7 @@ int mlx5_ib_advise_mr(struct ib_pd *pd, struct ib_sge *sg_list, u32 num_sge, struct uverbs_attr_bundle *attrs); -struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); +int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); @@ -1173,9 +1258,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int access_flags, - struct ib_pd *pd, struct ib_udata *udata); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int access_flags, + struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -1193,8 +1278,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in, struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad); @@ -1214,22 +1298,15 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order); -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags); -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags); +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry); + unsigned int entry, int access_flags); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); @@ -1238,12 +1315,12 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); -struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata); +int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, @@ -1267,6 +1344,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1288,6 +1366,10 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } +static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; @@ -1296,8 +1378,8 @@ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage); -void *__mlx5_ib_add(struct mlx5_ib_dev *dev, - const struct mlx5_ib_profile *profile); +int __mlx5_ib_add(struct mlx5_ib_dev *dev, + const struct mlx5_ib_profile *profile); int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); @@ -1318,9 +1400,9 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); /* GSI QP helper functions */ -struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr); -int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp); +int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, + struct ib_qp_init_attr *attr); +int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp); int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask); int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, @@ -1358,7 +1440,7 @@ static inline void init_query_mad(struct ib_smp *mad) static inline int is_qp1(enum ib_qp_type qp_type) { - return qp_type == MLX5_IB_QPT_HW_GSI; + return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI; } #define MLX5_MAX_UMR_SHIFT 16 @@ -1435,32 +1517,60 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; } -unsigned long mlx5_ib_get_xlt_emergency_page(void); -void mlx5_ib_put_xlt_emergency_page(void); +extern void *xlt_emergency_page; int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); -static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, - bool do_modify_atomic, int access_flags) +static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev, + size_t length) { + /* + * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is + * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka + * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey + * can never be enabled without this capability. Simplify this weird + * quirky hardware by just saying it can't use PAS lists with UMR at + * all. + */ if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) return false; - if (do_modify_atomic && + /* + * length is the size of the MR in bytes when mlx5_ib_update_xlt() is + * used. + */ + if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && + length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE) + return false; + return true; +} + +/* + * true if an existing MR can be reconfigured to new access_flags using UMR. + * Older HW cannot use UMR to update certain elements of the MKC. See + * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask() + */ +static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) +{ + unsigned int diffs = current_access_flags ^ target_access_flags; + + if ((diffs & IB_ACCESS_REMOTE_ATOMIC) && MLX5_CAP_GEN(dev->mdev, atomic) && MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) return false; - if (access_flags & IB_ACCESS_RELAXED_ORDERING && + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) return false; - if (access_flags & IB_ACCESS_RELAXED_ORDERING && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return false; return true; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3e6f2f9c6655..24f8d59a42ea 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -41,6 +41,13 @@ #include <rdma/ib_verbs.h> #include "mlx5_ib.h" +/* + * We can't use an array for xlt_emergency_page because dma_map_single doesn't + * work on kernel modules memory + */ +void *xlt_emergency_page; +static DEFINE_MUTEX(xlt_emergency_page_mutex); + enum { MAX_PENDING_REG_MR = 8, }; @@ -49,6 +56,32 @@ enum { static void create_mkey_callback(int status, struct mlx5_async_work *context); +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate); + +static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, + struct ib_pd *pd) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); +} static void assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, @@ -100,18 +133,12 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) -{ - return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= - length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); -} - static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); - struct mlx5_ib_dev *dev = mr->dev; struct mlx5_cache_ent *ent = mr->cache_ent; + struct mlx5_ib_dev *dev = ent->dev; unsigned long flags; if (status) { @@ -148,16 +175,14 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return NULL; - mr->order = ent->order; mr->cache_ent = ent; - mr->dev = ent->dev; + set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); MLX5_SET(mkc, mkc, log_page_size, ent->page); return mr; @@ -534,7 +559,7 @@ static void cache_work_func(struct work_struct *work) /* Allocate a special entry from the cache */ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry) + unsigned int entry, int access_flags) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; @@ -544,6 +569,10 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, entry >= ARRAY_SIZE(cache->ent))) return ERR_PTR(-EINVAL); + /* Matches access in alloc_cache_mr() */ + if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) + return ERR_PTR(-EOPNOTSUPP); + ent = &cache->ent[entry]; spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { @@ -558,6 +587,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); } + mr->access_flags = access_flags; return mr; } @@ -613,6 +643,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mlx5_mr_cache_invalidate(mr)) { detach_mr_from_cache(mr); destroy_mkey(dev, mr); + kfree(mr); return; } @@ -730,8 +761,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->is_rep && - mlx5_core_is_pf(dev->mdev)) + !dev->is_rep && mlx5_core_is_pf(dev->mdev) && + mlx5_ib_can_load_pas_with_umr(dev, 0)) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; @@ -774,29 +805,6 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) return 0; } -static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, - struct ib_pd *pd) -{ - struct mlx5_ib_dev *dev = to_mdev(pd->device); - - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - !!(acc & IB_ACCESS_RELAXED_ORDERING)); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - !!(acc & IB_ACCESS_RELAXED_ORDERING)); - - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, start_addr); -} - struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -861,57 +869,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, - int access_flags, struct ib_umem **umem, int *npages, - int *page_shift, int *ncont, int *order) -{ - struct ib_umem *u; - - *umem = NULL; - - if (access_flags & IB_ACCESS_ON_DEMAND) { - struct ib_umem_odp *odp; - - odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, - &mlx5_mn_ops); - if (IS_ERR(odp)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", - PTR_ERR(odp)); - return PTR_ERR(odp); - } - - u = &odp->umem; - - *page_shift = odp->page_shift; - *ncont = ib_umem_odp_num_pages(odp); - *npages = *ncont << (*page_shift - PAGE_SHIFT); - if (order) - *order = ilog2(roundup_pow_of_two(*ncont)); - } else { - u = ib_umem_get(&dev->ib_dev, start, length, access_flags); - if (IS_ERR(u)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); - return PTR_ERR(u); - } - - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, - page_shift, ncont, order); - } - - if (!*npages) { - mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(u); - return -EINVAL; - } - - *umem = u; - - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", - *npages, *ncont, *order, *page_shift); - - return 0; -} - static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -968,20 +925,49 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, return &cache->ent[order]; } -static struct mlx5_ib_mr * -alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, - u64 len, int npages, int page_shift, unsigned int order, - int access_flags) +static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, + u64 length, int access_flags) +{ + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + mr->ibmr.length = length; + mr->ibmr.device = &dev->ib_dev; + mr->access_flags = access_flags; +} + +static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, + struct ib_umem *umem, u64 iova, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); + struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; + unsigned int page_size; + + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!page_size)) + return ERR_PTR(-EINVAL); + ent = mr_cache_ent_from_order( + dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); + /* + * Matches access in alloc_cache_mr(). If the MR can't come from the + * cache then synchronously create an uncached one. + */ + if (!ent || ent->limit == 0 || + !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { + mutex_lock(&dev->slow_path_mutex); + mr = reg_create(pd, umem, iova, access_flags, page_size, false); + mutex_unlock(&dev->slow_path_mutex); + return mr; + } - if (!ent) - return ERR_PTR(-E2BIG); mr = get_cache_mr(ent); if (!mr) { mr = create_cache_mr(ent); + /* + * The above already tried to do the same stuff as reg_create(), + * no reason to try it again. + */ if (IS_ERR(mr)) return mr; } @@ -990,9 +976,12 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, mr->umem = umem; mr->access_flags = access_flags; mr->desc_size = sizeof(struct mlx5_mtt); - mr->mmkey.iova = virt_addr; - mr->mmkey.size = len; + mr->mmkey.iova = iova; + mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; + mr->page_shift = order_base_2(page_size); + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); return mr; } @@ -1001,14 +990,144 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, MLX5_UMR_MTT_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 +/* + * Allocate a temporary buffer to hold the per-page information to transfer to + * HW. For efficiency this should be as large as it can be, but buffer + * allocation failure is not allowed, so try smaller sizes. + */ +static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) +{ + const size_t xlt_chunk_align = + MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size); + size_t size; + void *res = NULL; + + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); + + /* + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the + * allocation can't trigger any kind of reclaim. + */ + might_sleep(); + + gfp_mask |= __GFP_ZERO; + + /* + * If the system already has a suitable high order page then just use + * that, but don't try hard to create one. This max is about 1M, so a + * free x86 huge page will satisfy it. + */ + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), + MLX5_MAX_UMR_CHUNK); + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + + if (size > MLX5_SPARE_UMR_CHUNK) { + size = MLX5_SPARE_UMR_CHUNK; + *nents = get_order(size) / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + } + + *nents = PAGE_SIZE / ent_size; + res = (void *)__get_free_page(gfp_mask); + if (res) + return res; + + mutex_lock(&xlt_emergency_page_mutex); + memset(xlt_emergency_page, 0, PAGE_SIZE); + return xlt_emergency_page; +} + +static void mlx5_ib_free_xlt(void *xlt, size_t length) +{ + if (xlt == xlt_emergency_page) { + mutex_unlock(&xlt_emergency_page_mutex); + return; + } + + free_pages((unsigned long)xlt, get_order(length)); +} + +/* + * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for + * submission. + */ +static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, + struct mlx5_umr_wr *wr, struct ib_sge *sg, + size_t nents, size_t ent_size, + unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + dma_addr_t dma; + void *xlt; + + xlt = mlx5_ib_alloc_xlt(&nents, ent_size, + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : + GFP_KERNEL); + sg->length = nents * ent_size; + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); + mlx5_ib_free_xlt(xlt, sg->length); + return NULL; + } + sg->addr = dma; + sg->lkey = dev->umrc.pd->local_dma_lkey; + + memset(wr, 0, sizeof(*wr)); + wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) + wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; + wr->wr.sg_list = sg; + wr->wr.num_sge = 1; + wr->wr.opcode = MLX5_IB_WR_UMR; + wr->pd = mr->ibmr.pd; + wr->mkey = mr->mmkey.key; + wr->length = mr->mmkey.size; + wr->virt_addr = mr->mmkey.iova; + wr->access_flags = mr->access_flags; + wr->page_shift = mr->page_shift; + wr->xlt_size = sg->length; + return xlt; +} + +static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, + struct ib_sge *sg) +{ + struct device *ddev = &dev->mdev->pdev->dev; + + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); + mlx5_ib_free_xlt(xlt, sg->length); +} + +static unsigned int xlt_wr_final_send_flags(unsigned int flags) +{ + unsigned int res = 0; + + if (flags & MLX5_IB_UPD_XLT_ENABLE) + res |= MLX5_IB_SEND_UMR_ENABLE_MR | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) + res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + if (flags & MLX5_IB_UPD_XLT_ADDR) + res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + return res; +} + int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { - struct mlx5_ib_dev *dev = mr->dev; - struct device *ddev = dev->ib_dev.dev.parent; - int size; + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; void *xlt; - dma_addr_t dma; struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; @@ -1019,15 +1138,17 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, const int page_mask = page_align - 1; size_t pages_mapped = 0; size_t pages_to_map = 0; - size_t pages_iter = 0; + size_t pages_iter; size_t size_to_map = 0; - gfp_t gfp; - bool use_emergency_page = false; + size_t orig_sg_length; if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && !umr_can_use_indirect_mkey(dev)) return -EPERM; + if (WARN_ON(!mr->umem->is_odp)) + return -EINVAL; + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ @@ -1035,63 +1156,21 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, npages += idx & page_mask; idx &= ~page_mask; } - - gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; - gfp |= __GFP_ZERO | __GFP_NOWARN; - pages_to_map = ALIGN(npages, page_align); - size = desc_size * pages_to_map; - size = min_t(int, size, MLX5_MAX_UMR_CHUNK); - xlt = (void *)__get_free_pages(gfp, get_order(size)); - if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { - mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", - size, get_order(size), MLX5_SPARE_UMR_CHUNK); - - size = MLX5_SPARE_UMR_CHUNK; - xlt = (void *)__get_free_pages(gfp, get_order(size)); - } - - if (!xlt) { - mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); - xlt = (void *)mlx5_ib_get_xlt_emergency_page(); - size = PAGE_SIZE; - memset(xlt, 0, size); - use_emergency_page = true; - } - pages_iter = size / desc_size; - dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); - err = -ENOMEM; - goto free_xlt; - } + xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); + if (!xlt) + return -ENOMEM; + pages_iter = sg.length / desc_size; + orig_sg_length = sg.length; - if (mr->umem->is_odp) { - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } + pages_to_map = min_t(size_t, pages_to_map, max_pages); } - sg.addr = dma; - sg.lkey = dev->umrc.pd->local_dma_lkey; - - memset(&wr, 0, sizeof(wr)); - wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) - wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr.wr.sg_list = &sg; - wr.wr.num_sge = 1; - wr.wr.opcode = MLX5_IB_WR_UMR; - - wr.pd = mr->ibmr.pd; - wr.mkey = mr->mmkey.key; - wr.length = mr->mmkey.size; - wr.virt_addr = mr->mmkey.iova; - wr.access_flags = mr->access_flags; wr.page_shift = page_shift; for (pages_mapped = 0; @@ -1099,50 +1178,87 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_mapped += pages_iter, idx += pages_iter) { npages = min_t(int, pages_iter, pages_to_map - pages_mapped); size_to_map = npages * desc_size; - dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); - if (mr->umem->is_odp) { - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - } else { - __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, - npages, xlt, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages - * brought from the umem. - */ - memset(xlt + size_to_map, 0, size - size_to_map); - } - dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - if (pages_mapped + pages_iter >= pages_to_map) { - if (flags & MLX5_IB_UPD_XLT_ENABLE) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || - flags & MLX5_IB_UPD_XLT_ACCESS) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - } + if (pages_mapped + pages_iter >= pages_to_map) + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); wr.offset = idx * desc_size; wr.xlt_size = sg.length; err = mlx5_ib_post_send_wait(dev, &wr); } - dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, xlt, &sg); + return err; +} -free_xlt: - if (use_emergency_page) - mlx5_ib_put_xlt_emergency_page(); - else - free_pages((unsigned long)xlt, get_order(size)); +/* + * Send the DMA list to the HW for a normal MR using UMR. + */ +static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + struct ib_block_iter biter; + struct mlx5_mtt *cur_mtt; + struct mlx5_umr_wr wr; + size_t orig_sg_length; + struct mlx5_mtt *mtt; + size_t final_size; + struct ib_sge sg; + int err = 0; + + if (WARN_ON(mr->umem->is_odp)) + return -EINVAL; + + mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, + ib_umem_num_dma_blocks(mr->umem, + 1 << mr->page_shift), + sizeof(*mtt), flags); + if (!mtt) + return -ENOMEM; + orig_sg_length = sg.length; + + cur_mtt = mtt; + rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, + BIT(mr->page_shift)) { + if (cur_mtt == (void *)mtt + sg.length) { + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + if (err) + goto err; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + wr.offset += sg.length; + cur_mtt = mtt; + } + cur_mtt->ptag = + cpu_to_be64(rdma_block_iter_dma_address(&biter) | + MLX5_IB_MTT_PRESENT); + cur_mtt++; + } + + final_size = (void *)cur_mtt - (void *)mtt; + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + memset(cur_mtt, 0, sg.length - final_size); + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); + wr.xlt_size = sg.length; + + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + +err: + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, mtt, &sg); return err; } @@ -1150,11 +1266,9 @@ free_xlt: * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. */ -static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, - u64 virt_addr, u64 length, - struct ib_umem *umem, int npages, - int page_shift, int access_flags, - bool populate) +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; @@ -1165,57 +1279,54 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); - mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); + if (!page_size) + return ERR_PTR(-EINVAL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->ibmr.pd = pd; mr->access_flags = access_flags; + mr->page_shift = order_base_2(page_size); inlen = MLX5_ST_SZ_BYTES(create_mkey_in); if (populate) - inlen += sizeof(*pas) * roundup(npages, 2); + inlen += sizeof(*pas) * + roundup(ib_umem_num_dma_blocks(umem, page_size), 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_1; } pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - if (populate && !(access_flags & IB_ACCESS_ON_DEMAND)) - mlx5_ib_populate_pas(dev, umem, page_shift, pas, + if (populate) { + if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) { + err = -EINVAL; + goto err_2; + } + mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); + } /* The pg_access bit allows setting the access flags * in the page list submitted with the command. */ MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + set_mkc_access_pd_addr_fields(mkc, access_flags, iova, + populate ? pd : dev->umrc.pd); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); - MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET64(mkc, mkc, start_addr, virt_addr); - MLX5_SET64(mkc, mkc, len, length); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET64(mkc, mkc, len, umem->length); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(virt_addr, length, page_shift)); - MLX5_SET(mkc, mkc, log_page_size, page_shift); - MLX5_SET(mkc, mkc, qpn, 0xffffff); + get_octo_len(iova, umem->length, mr->page_shift)); + MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, page_shift)); + get_octo_len(iova, umem->length, mr->page_shift)); } err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); @@ -1225,7 +1336,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, } mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); - mr->dev = dev; + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1234,25 +1346,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err_2: kvfree(in); - err_1: - if (!ibmr) - kfree(mr); - + kfree(mr); return ERR_PTR(err); } -static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - int npages, u64 length, int access_flags) -{ - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); - mr->ibmr.lkey = mr->mmkey.key; - mr->ibmr.rkey = mr->mmkey.key; - mr->ibmr.length = length; - mr->access_flags = access_flags; -} - static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, u64 length, int acc, int mode) { @@ -1286,8 +1384,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, kfree(in); - mr->umem = NULL; - set_mr_fields(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, length, acc); return &mr->ibmr; @@ -1308,7 +1405,8 @@ int mlx5_ib_advise_mr(struct ib_pd *pd, struct uverbs_attr_bundle *attrs) { if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && - advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) return -EOPNOTSUPP; return mlx5_ib_advise_mr_prefetch(pd, advice, flags, @@ -1347,32 +1445,65 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, attr->access_flags, mode); } -struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; - bool use_umr; - struct ib_umem *umem; - int page_shift; - int npages; - int ncont; - int order; + bool xlt_with_umr; int err; - if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) - return ERR_PTR(-EOPNOTSUPP); + xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); + if (xlt_with_umr) { + mr = alloc_cacheable_mr(pd, umem, iova, access_flags); + } else { + unsigned int page_size = mlx5_umem_find_best_pgsz( + umem, mkc, log_page_size, 0, iova); - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); + mutex_lock(&dev->slow_path_mutex); + mr = reg_create(pd, umem, iova, access_flags, page_size, true); + mutex_unlock(&dev->slow_path_mutex); + } + if (IS_ERR(mr)) { + ib_umem_release(umem); + return ERR_CAST(mr); + } + + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); + + atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); + + if (xlt_with_umr) { + /* + * If the MR was created with reg_create then it will be + * configured properly but left disabled. It is safe to go ahead + * and configure it again via UMR while enabling it. + */ + err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); + if (err) { + dereg_mr(dev, mr); + return ERR_PTR(err); + } + } + return &mr->ibmr; +} + +static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; + struct mlx5_ib_mr *mr; + int err; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && - length == U64_MAX) { - if (virt_addr != start) + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + if (!start && length == U64_MAX) { + if (iova != 0) return ERR_PTR(-EINVAL); - if (!(access_flags & IB_ACCESS_ON_DEMAND) || - !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return ERR_PTR(-EINVAL); mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); @@ -1381,79 +1512,59 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; } - err = mr_umem_get(dev, start, length, access_flags, &umem, - &npages, &page_shift, &ncont, &order); - - if (err < 0) - return ERR_PTR(err); - - use_umr = mlx5_ib_can_use_umr(dev, true, access_flags); - - if (order <= mr_cache_max_order(dev) && use_umr) { - mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, - page_shift, order, access_flags); - if (PTR_ERR(mr) == -EAGAIN) { - mlx5_ib_dbg(dev, "cache empty for order %d\n", order); - mr = NULL; - } - } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { - if (access_flags & IB_ACCESS_ON_DEMAND) { - err = -EINVAL; - pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); - goto error; - } - use_umr = false; - } + /* ODP requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); - if (!mr) { - mutex_lock(&dev->slow_path_mutex); - mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !use_umr); - mutex_unlock(&dev->slow_path_mutex); - } + odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, + &mlx5_mn_ops); + if (IS_ERR(odp)) + return ERR_CAST(odp); + mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags); if (IS_ERR(mr)) { - err = PTR_ERR(mr); - goto error; + ib_umem_release(&odp->umem); + return ERR_CAST(mr); } - mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); - - mr->umem = umem; - set_mr_fields(dev, mr, npages, length, access_flags); + odp->private = mr; + init_waitqueue_head(&mr->q_deferred_work); + atomic_set(&mr->num_deferred_work, 0); + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL)); + if (err) + goto err_dereg_mr; - if (use_umr) { - int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; + err = mlx5_ib_init_odp_mr(mr); + if (err) + goto err_dereg_mr; + return &mr->ibmr; - if (access_flags & IB_ACCESS_ON_DEMAND) - update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP; +err_dereg_mr: + dereg_mr(dev, mr); + return ERR_PTR(err); +} - err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, - update_xlt_flags); +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *umem; - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } - } + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); - if (is_odp_mr(mr)) { - to_ib_umem_odp(mr->umem)->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, - GFP_KERNEL)); - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } - } + mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, access_flags); - return &mr->ibmr; -error: - ib_umem_release(umem); - return ERR_PTR(err); + if (access_flags & IB_ACCESS_ON_DEMAND) + return create_user_odp_mr(pd, start, length, iova, access_flags, + udata); + umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); + if (IS_ERR(umem)) + return ERR_CAST(umem); + return create_real_mr(pd, umem, iova, access_flags); } /** @@ -1468,148 +1579,224 @@ int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) { struct mlx5_umr_wr umrwr = {}; - if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = mr->dev->umrc.pd; + umrwr.pd = mr_to_mdev(mr)->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; - return mlx5_ib_post_send_wait(mr->dev, &umrwr); + return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); } -static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, - int access_flags, int flags) -{ - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_umr_wr umrwr = {}; +/* + * True if the change in access flags can be done via UMR, only some access + * flags can be updated. + */ +static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) +{ + unsigned int diffs = current_access_flags ^ target_access_flags; + + if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) + return false; + return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, + target_access_flags); +} + +static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_umr_wr umrwr = { + .wr = { + .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, + .opcode = MLX5_IB_WR_UMR, + }, + .mkey = mr->mmkey.key, + .pd = pd, + .access_flags = access_flags, + }; int err; - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; - - umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.mkey = mr->mmkey.key; - - if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { - umrwr.pd = pd; - umrwr.access_flags = access_flags; - umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - } - err = mlx5_ib_post_send_wait(dev, &umrwr); + if (err) + return err; - return err; + mr->access_flags = access_flags; + mr->mmkey.pd = to_mpd(pd)->pdn; + return 0; } -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int new_access_flags, - struct ib_pd *new_pd, struct ib_udata *udata) +static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, + struct ib_umem *new_umem, + int new_access_flags, u64 iova, + unsigned long *page_size) { - struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); - struct mlx5_ib_mr *mr = to_mmr(ib_mr); - struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; - int access_flags = flags & IB_MR_REREG_ACCESS ? - new_access_flags : - mr->access_flags; - int page_shift = 0; - int upd_flags = 0; - int npages = 0; - int ncont = 0; - int order = 0; - u64 addr, len; - int err; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); + /* We only track the allocated sizes of MRs from the cache */ + if (!mr->cache_ent) + return false; + if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) + return false; - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + *page_size = + mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!*page_size)) + return false; + return (1ULL << mr->cache_ent->order) >= + ib_umem_num_dma_blocks(new_umem, *page_size); +} - if (!mr->umem) - return -EINVAL; +static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags, int flags, struct ib_umem *new_umem, + u64 iova, unsigned long page_size) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; + struct ib_umem *old_umem = mr->umem; + int err; - if (is_odp_mr(mr)) - return -EOPNOTSUPP; + /* + * To keep everything simple the MR is revoked before we start to mess + * with it. This ensure the change is atomic relative to any use of the + * MR. + */ + err = mlx5_mr_cache_invalidate(mr); + if (err) + return err; - if (flags & IB_MR_REREG_TRANS) { - addr = virt_addr; - len = length; - } else { - addr = mr->umem->address; - len = mr->umem->length; + if (flags & IB_MR_REREG_PD) { + mr->ibmr.pd = pd; + mr->mmkey.pd = to_mpd(pd)->pdn; + upd_flags |= MLX5_IB_UPD_XLT_PD; + } + if (flags & IB_MR_REREG_ACCESS) { + mr->access_flags = access_flags; + upd_flags |= MLX5_IB_UPD_XLT_ACCESS; } - if (flags != IB_MR_REREG_PD) { + mr->ibmr.length = new_umem->length; + mr->mmkey.iova = iova; + mr->mmkey.size = new_umem->length; + mr->page_shift = order_base_2(page_size); + mr->umem = new_umem; + err = mlx5_ib_update_mr_pas(mr, upd_flags); + if (err) { /* - * Replace umem. This needs to be done whether or not UMR is - * used. + * The MR is revoked at this point so there is no issue to free + * new_umem. */ - flags |= IB_MR_REREG_TRANS; - ib_umem_release(mr->umem); - mr->umem = NULL; - err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); - if (err) - goto err; + mr->umem = old_umem; + return err; } - if (!mlx5_ib_can_use_umr(dev, true, access_flags) || - (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { - /* - * UMR can't be used - MKey needs to be replaced. - */ - if (mr->cache_ent) - detach_mr_from_cache(mr); - err = destroy_mkey(dev, mr); - if (err) - goto err; + atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); + ib_umem_release(old_umem); + atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); + return 0; +} - mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, - page_shift, access_flags, true); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 iova, int new_access_flags, + struct ib_pd *new_pd, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); + struct mlx5_ib_mr *mr = to_mmr(ib_mr); + int err; - if (IS_ERR(mr)) { - err = PTR_ERR(mr); - mr = to_mmr(ib_mr); - goto err; - } - } else { - /* - * Send a UMR WQE - */ - mr->ibmr.pd = pd; - mr->access_flags = access_flags; - mr->mmkey.iova = addr; - mr->mmkey.size = len; - mr->mmkey.pd = to_mpd(pd)->pdn; + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); - if (flags & IB_MR_REREG_TRANS) { - upd_flags = MLX5_IB_UPD_XLT_ADDR; - if (flags & IB_MR_REREG_PD) - upd_flags |= MLX5_IB_UPD_XLT_PD; - if (flags & IB_MR_REREG_ACCESS) - upd_flags |= MLX5_IB_UPD_XLT_ACCESS; - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, - upd_flags); - } else { - err = rereg_umr(pd, mr, access_flags, flags); + mlx5_ib_dbg( + dev, + "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, new_access_flags); + + if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) + return ERR_PTR(-EOPNOTSUPP); + + if (!(flags & IB_MR_REREG_ACCESS)) + new_access_flags = mr->access_flags; + if (!(flags & IB_MR_REREG_PD)) + new_pd = ib_mr->pd; + + if (!(flags & IB_MR_REREG_TRANS)) { + struct ib_umem *umem; + + /* Fast path for PD/access change */ + if (can_use_umr_rereg_access(dev, mr->access_flags, + new_access_flags)) { + err = umr_rereg_pd_access(mr, new_pd, new_access_flags); + if (err) + return ERR_PTR(err); + return NULL; } + /* DM or ODP MR's don't have a umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; + /* + * Only one active MR can refer to a umem at one time, revoke + * the old MR before assigning the umem to the new one. + */ + err = mlx5_mr_cache_invalidate(mr); if (err) - goto err; - } - - set_mr_fields(dev, mr, npages, len, access_flags); + return ERR_PTR(err); + umem = mr->umem; + mr->umem = NULL; + atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - return 0; + return create_real_mr(new_pd, umem, mr->mmkey.iova, + new_access_flags); + } -err: - ib_umem_release(mr->umem); - mr->umem = NULL; + /* + * DM doesn't have a PAS list so we can't re-use it, odp does but the + * logic around releasing the umem is different + */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; + + if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && + can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { + struct ib_umem *new_umem; + unsigned long page_size; + + new_umem = ib_umem_get(&dev->ib_dev, start, length, + new_access_flags); + if (IS_ERR(new_umem)) + return ERR_CAST(new_umem); + + /* Fast path for PAS change */ + if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, + &page_size)) { + err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, + new_umem, iova, page_size); + if (err) { + ib_umem_release(new_umem); + return ERR_PTR(err); + } + return NULL; + } + return create_real_mr(new_pd, new_umem, iova, new_access_flags); + } - clean_mr(dev, mr); - return err; + /* + * Everything else has no state we can preserve, just create a new MR + * from scratch + */ +recreate: + return mlx5_ib_reg_user_mr(new_pd, start, length, iova, + new_access_flags, udata); } static int @@ -1618,6 +1805,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, int ndescs, int desc_size) { + struct mlx5_ib_dev *dev = to_mdev(device); + struct device *ddev = &dev->mdev->pdev->dev; int size = ndescs * desc_size; int add_size; int ret; @@ -1630,9 +1819,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); - mr->desc_map = dma_map_single(device->dev.parent, mr->descs, - size, DMA_TO_DEVICE); - if (dma_mapping_error(device->dev.parent, mr->desc_map)) { + mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->desc_map)) { ret = -ENOMEM; goto err; } @@ -1650,9 +1838,10 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) if (mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; + struct mlx5_ib_dev *dev = to_mdev(device); - dma_unmap_single(device->dev.parent, mr->desc_map, - size, DMA_TO_DEVICE); + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, + DMA_TO_DEVICE); kfree(mr->descs_alloc); mr->descs = NULL; } @@ -1682,7 +1871,6 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int npages = mr->npages; struct ib_umem *umem = mr->umem; /* Stop all DMA */ @@ -1691,14 +1879,17 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); + if (umem) { + if (!is_odp_mr(mr)) + atomic_sub(ib_umem_num_pages(umem), + &dev->mdev->priv.reg_pages); + ib_umem_release(umem); + } + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); - - ib_umem_release(umem); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) @@ -1727,9 +1918,9 @@ static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + /* This is only used from the kernel, so setting the PD is OK. */ + set_mkc_access_pd_addr_fields(mkc, 0, 0, pd); MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, translations_octword_size, ndescs); MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); @@ -1973,12 +2164,11 @@ struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, max_num_meta_sg); } -struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) +int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_dev *dev = to_mdev(ibmw->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_ib_mw *mw = NULL; + struct mlx5_ib_mw *mw = to_mmw(ibmw); u32 *in = NULL; void *mkc; int ndescs; @@ -1991,21 +2181,20 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) - return ERR_PTR(err); + return err; if (req.comp_mask || req.reserved1 || req.reserved2) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (udata->inlen > sizeof(req) && !ib_is_udata_cleared(udata, sizeof(req), udata->inlen - sizeof(req))) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); - mw = kzalloc(sizeof(*mw), GFP_KERNEL); in = kzalloc(inlen, GFP_KERNEL); - if (!mw || !in) { + if (!in) { err = -ENOMEM; goto free; } @@ -2014,11 +2203,11 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, translations_octword_size, ndescs); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); - MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); + MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); @@ -2026,17 +2215,15 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, goto free; mw->mmkey.type = MLX5_MKEY_MW; - mw->ibmw.rkey = mw->mmkey.key; + ibmw->rkey = mw->mmkey.key; mw->ndescs = ndescs; - resp.response_length = min(offsetof(typeof(resp), response_length) + - sizeof(resp.response_length), udata->outlen); + resp.response_length = + min(offsetofend(typeof(resp), response_length), udata->outlen); if (resp.response_length) { err = ib_copy_to_udata(udata, &resp, resp.response_length); - if (err) { - mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); - goto free; - } + if (err) + goto free_mkey; } if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { @@ -2048,21 +2235,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, } kfree(in); - return &mw->ibmw; + return 0; free_mkey: mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); free: - kfree(mw); kfree(in); - return ERR_PTR(err); + return err; } int mlx5_ib_dealloc_mw(struct ib_mw *mw) { struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); - int err; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); @@ -2073,11 +2258,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) synchronize_srcu(&dev->odp_srcu); } - err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); - if (err) - return err; - kfree(mmw); - return 0; + return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index cfd7efab114e..aa2413b50adc 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -102,7 +102,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, if (flags & MLX5_IB_UPD_XLT_ZAP) { for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } return; @@ -129,7 +129,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * locking around the xarray. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&imr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -139,7 +139,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, pklm->key = cpu_to_be32(mtt->ibmr.lkey); pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } } @@ -199,7 +199,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) mutex_unlock(&odp->umem_mutex); if (!mr->cache_ent) { - mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); WARN_ON(mr->descs); } } @@ -222,19 +222,19 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) WARN_ON(atomic_read(&mr->num_deferred_work)); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr->dev->odp_srcu); + srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); + srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); mr->parent = NULL; - mlx5_mr_cache_free(mr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); if (atomic_dec_and_test(&imr->num_deferred_work)) wake_up(&imr->q_deferred_work); @@ -274,7 +274,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) goto out_unlock; atomic_inc(&imr->num_deferred_work); - call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, free_implicit_child_mr_rcu); out_unlock: @@ -382,7 +382,7 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); if (!MLX5_CAP_GEN(dev->mdev, pg) || - !mlx5_ib_can_use_umr(dev, true, 0)) + !mlx5_ib_can_load_pas_with_umr(dev, 0)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -476,12 +476,13 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY); + ret = mr = mlx5_mr_cache_alloc( + mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); if (IS_ERR(mr)) goto out_umem; mr->ibmr.pd = imr->ibmr.pd; - mr->access_flags = imr->access_flags; + mr->ibmr.device = &mr_to_mdev(imr)->ib_dev; mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; @@ -517,11 +518,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); + mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; out_mr: - mlx5_mr_cache_free(imr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: ib_umem_odp_release(odp); return ret; @@ -536,22 +537,26 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct mlx5_ib_mr *imr; int err; + if (!mlx5_ib_can_load_pas_with_umr(dev, + MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) + return ERR_PTR(-EOPNOTSUPP); + umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY); + imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags); if (IS_ERR(imr)) { err = PTR_ERR(imr); goto out_umem; } imr->ibmr.pd = &pd->ibpd; - imr->access_flags = access_flags; imr->mmkey.iova = 0; imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; + imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); @@ -585,7 +590,7 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); - struct mlx5_ib_dev *dev = imr->dev; + struct mlx5_ib_dev *dev = mr_to_mdev(imr); struct list_head destroy_list; struct mlx5_ib_mr *mtt; struct mlx5_ib_mr *tmp; @@ -655,10 +660,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) { /* Prevent new page faults and prefetch requests from succeeding */ - xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr->dev->odp_srcu); + synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); @@ -666,15 +671,21 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) +#define MLX5_PF_FLAGS_SNAPSHOT BIT(2) +#define MLX5_PF_FLAGS_ENABLE BIT(3) static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, u64 user_va, size_t bcnt, u32 *bytes_mapped, u32 flags) { int page_shift, ret, np; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; - unsigned long current_seq; u64 access_mask; u64 start_idx; + bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT); + u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC; + + if (flags & MLX5_PF_FLAGS_ENABLE) + xlt_flags |= MLX5_IB_UPD_XLT_ENABLE; page_shift = odp->page_shift; start_idx = (user_va - ib_umem_start(odp)) >> page_shift; @@ -683,30 +694,20 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; - current_seq = mmu_interval_read_begin(&odp->notifier); - - np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask, - current_seq); + np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault); if (np < 0) return np; - mutex_lock(&odp->umem_mutex); - if (!mmu_interval_read_retry(&odp->notifier, current_seq)) { - /* - * No need to check whether the MTTs really belong to - * this MR, since ib_umem_odp_map_dma_pages already - * checks this. - */ - ret = mlx5_ib_update_xlt(mr, start_idx, np, - page_shift, MLX5_IB_UPD_XLT_ATOMIC); - } else { - ret = -EAGAIN; - } + /* + * No need to check whether the MTTs really belong to this MR, since + * ib_umem_odp_map_dma_and_lock already checks this. + */ + ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags); mutex_unlock(&odp->umem_mutex); if (ret < 0) { if (ret != -EAGAIN) - mlx5_ib_err(mr->dev, + mlx5_ib_err(mr_to_mdev(mr), "Failed to update mkey page tables\n"); goto out; } @@ -796,7 +797,7 @@ out: MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); if (err) { - mlx5_ib_err(imr->dev, "Failed to update PAS\n"); + mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n"); return err; } return ret; @@ -816,7 +817,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; @@ -836,6 +837,16 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, flags); } +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) +{ + int ret; + + ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address, + mr->umem->length, NULL, + MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE); + return ret >= 0 ? 0 : ret; +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -1774,7 +1785,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = work->frags[0].mr->dev; + dev = mr_to_mdev(work->frags[0].mr); /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { @@ -1862,6 +1873,9 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) + pf_flags |= MLX5_PF_FLAGS_SNAPSHOT; + if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list, num_sge); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5758dbe64045..0cb7cc642d87 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -778,39 +778,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, return bfregi->sys_pages[index_of_sys_page] + offset; } -static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, - unsigned long addr, size_t size, - struct ib_umem **umem, int *npages, int *page_shift, - int *ncont, u32 *offset) -{ - int err; - - *umem = ib_umem_get(&dev->ib_dev, addr, size, 0); - if (IS_ERR(*umem)) { - mlx5_ib_dbg(dev, "umem_get failed\n"); - return PTR_ERR(*umem); - } - - mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); - - err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n", - addr, size, *npages, *page_shift, *ncont, *offset); - - return 0; - -err_umem: - ib_umem_release(*umem); - *umem = NULL; - - return err; -} - static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_rwq *rwq, struct ib_udata *udata) { @@ -833,10 +800,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - int page_shift = 0; - int npages; + unsigned long page_size = 0; u32 offset = 0; - int ncont = 0; int err; if (!ucmd->buf_addr) @@ -849,23 +814,26 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, - &rwq->rq_page_offset); - if (err) { + page_size = mlx5_umem_find_best_quantized_pgoff( + rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &rwq->rq_page_offset); + if (!page_size) { mlx5_ib_warn(dev, "bad offset\n"); + err = -EINVAL; goto err_umem; } - rwq->rq_num_pas = ncont; - rwq->page_shift = page_shift; - rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size); + rwq->page_shift = order_base_2(page_size); + rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT; rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); - mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", - (unsigned long long)ucmd->buf_addr, rwq->buf_size, - npages, page_shift, ncont, offset); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n", + (unsigned long long)ucmd->buf_addr, rwq->buf_size, + ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas, + offset); err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); if (err) { @@ -896,10 +864,9 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *context; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; - int page_shift = 0; + unsigned int page_offset_quantized = 0; + unsigned long page_size = 0; int uar_index = 0; - int npages; - u32 offset = 0; int bfregn; int ncont = 0; __be64 *pas; @@ -950,11 +917,21 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd->buf_addr; - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, - ubuffer->buf_size, &ubuffer->umem, - &npages, &page_shift, &ncont, &offset); - if (err) + ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(ubuffer->umem)) { + err = PTR_ERR(ubuffer->umem); goto err_bfreg; + } + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, qpc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size); } else { ubuffer->umem = NULL; } @@ -969,15 +946,14 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; MLX5_SET(create_qp_in, *in, uid, uid); - pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); - if (ubuffer->umem) - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); - qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); - - MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(qpc, qpc, page_offset, offset); - + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); + if (ubuffer->umem) { + mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0); + MLX5_SET(qpc, qpc, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, page_offset, page_offset_quantized); + } MLX5_SET(qpc, qpc, uar_page, uar_index); if (bfregn != MLX5_IB_INVALID_BFREG) resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); @@ -1209,18 +1185,24 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, void *wq; int inlen; int err; - int page_shift = 0; - int npages; - int ncont = 0; - u32 offset = 0; - - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, &ncont, - &offset); - if (err) - return err; + unsigned int page_offset_quantized; + unsigned long page_size; + + sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(sq->ubuffer.umem)) + return PTR_ERR(sq->ubuffer.umem); + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } - inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont; + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * + ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1248,11 +1230,12 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size)); - MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(wq, wq, page_offset, offset); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0); err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp); @@ -1278,40 +1261,31 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, ib_umem_release(sq->ubuffer.umem); } -static size_t get_rq_pas_size(void *qpc) -{ - u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; - u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); - u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size); - u32 page_offset = MLX5_GET(qpc, qpc, page_offset); - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; - - return rq_num_pas * sizeof(u64); -} - static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - size_t qpinlen, struct ib_pd *pd) + struct ib_pd *pd) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; - __be64 *qp_pas; void *in; void *rqc; void *wq; void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); - size_t rq_pas_size = get_rq_pas_size(qpc); + struct ib_umem *umem = rq->base.ubuffer.umem; + unsigned int page_offset_quantized; + unsigned long page_size = 0; size_t inlen; int err; - if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas)) + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, + MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, + &page_offset_quantized); + if (!page_size) return -EINVAL; - inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1333,16 +1307,16 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING) MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size)); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size)); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas); - memcpy(pas, qp_pas, rq_pas_size); + mlx5_ib_populate_pas(umem, page_size, pas, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp); @@ -1463,7 +1437,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd); if (err) goto err_destroy_sq; @@ -1477,7 +1451,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN; resp->tirn = rq->tirn; resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) { resp->tir_icm_addr = MLX5_GET( create_tir_out, out, icm_address_31_0); resp->tir_icm_addr |= @@ -1739,7 +1714,8 @@ create_tir: if (mucontext->devx_uid) { params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; params->resp.tirn = qp->rss_qp.tirn; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) { params->resp.tir_icm_addr = MLX5_GET(create_tir_out, out, icm_address_31_0); params->resp.tir_icm_addr |= @@ -2409,6 +2385,9 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 uidx = params->uidx; void *dctc; + if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct)) + return -EOPNOTSUPP; + qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL); if (!qp->dct.in) return -ENOMEM; @@ -2431,7 +2410,7 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, } qp->state = IB_QPS_RESET; - + rdma_restrack_no_track(&qp->ibqp.res); return 0; } @@ -2455,6 +2434,7 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case IB_QPT_GSI: if (dev->profile == &raw_eth_profile) goto out; + fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2506,18 +2486,6 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } - switch (attr->qp_type) { - case IB_QPT_SMI: - case MLX5_IB_QPT_HW_GSI: - case MLX5_IB_QPT_REG_UMR: - case IB_QPT_GSI: - mlx5_ib_dbg(dev, "Kernel doesn't support QP type %d\n", - attr->qp_type); - return -EINVAL; - default: - break; - } - /* * We don't need to see this warning, it means that kernel code * missing ib_pd. Placed here to catch developer's mistakes. @@ -2719,11 +2687,12 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, true, qp); - if (create_flags) + if (create_flags) { mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n", create_flags); - - return (create_flags) ? -EINVAL : 0; + return -EOPNOTSUPP; + } + return 0; } static int process_udata_size(struct mlx5_ib_dev *dev, @@ -2780,21 +2749,23 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto out; } - if (qp->type == MLX5_IB_QPT_DCT) { + switch (qp->type) { + case MLX5_IB_QPT_DCT: err = create_dct(dev, pd, qp, params); - goto out; - } - - if (qp->type == IB_QPT_XRC_TGT) { + break; + case IB_QPT_XRC_TGT: err = create_xrc_tgt_qp(dev, qp, params); - goto out; + break; + case IB_QPT_GSI: + err = mlx5_ib_create_gsi(pd, qp, params->attr); + break; + default: + if (params->udata) + err = create_user_qp(dev, pd, qp, params); + else + err = create_kernel_qp(dev, pd, qp, params); } - if (params->udata) - err = create_user_qp(dev, pd, qp, params); - else - err = create_kernel_qp(dev, pd, qp, params); - out: if (err) { mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type); @@ -2934,9 +2905,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, if (err) return ERR_PTR(err); - if (attr->qp_type == IB_QPT_GSI) - return mlx5_ib_gsi_create_qp(pd, attr); - params.udata = udata; params.uidx = MLX5_IB_DEFAULT_UIDX; params.attr = attr; @@ -3005,9 +2973,14 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, return &qp->ibqp; destroy_qp: - if (qp->type == MLX5_IB_QPT_DCT) { + switch (qp->type) { + case MLX5_IB_QPT_DCT: mlx5_ib_destroy_dct(qp); - } else { + break; + case IB_QPT_GSI: + mlx5_ib_destroy_gsi(qp); + break; + default: /* * These lines below are temp solution till QP allocation * will be moved to be under IB/core responsiblity. @@ -3032,7 +3005,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) struct mlx5_ib_qp *mqp = to_mqp(qp); if (unlikely(qp->qp_type == IB_QPT_GSI)) - return mlx5_ib_gsi_destroy_qp(qp); + return mlx5_ib_destroy_gsi(mqp); if (mqp->type == MLX5_IB_QPT_DCT) return mlx5_ib_destroy_dct(mqp); @@ -3088,20 +3061,44 @@ enum { MLX5_PATH_FLAG_COUNTER = 1 << 2, }; +static int ib_to_mlx5_rate_map(u8 rate) +{ + switch (rate) { + case IB_RATE_PORT_CURRENT: + return 0; + case IB_RATE_56_GBPS: + return 1; + case IB_RATE_25_GBPS: + return 2; + case IB_RATE_100_GBPS: + return 3; + case IB_RATE_200_GBPS: + return 4; + case IB_RATE_50_GBPS: + return 5; + default: + return rate + MLX5_STAT_RATE_OFFSET; + } + + return 0; +} + static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { + u32 stat_rate_support; + if (rate == IB_RATE_PORT_CURRENT) return 0; if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) return -EINVAL; + stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support); while (rate != IB_RATE_PORT_CURRENT && - !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - MLX5_CAP_GEN(dev->mdev, stat_rate_support))) + !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support)) --rate; - return rate ? rate + MLX5_STAT_RATE_OFFSET : rate; + return ib_to_mlx5_rate_map(rate); } static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, @@ -3643,14 +3640,12 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, MLX5_MAX_PORTS + 1; } -static bool qp_supports_affinity(struct ib_qp *qp) +static bool qp_supports_affinity(struct mlx5_ib_qp *qp) { - if ((qp->qp_type == IB_QPT_RC) || - (qp->qp_type == IB_QPT_UD) || - (qp->qp_type == IB_QPT_UC) || - (qp->qp_type == IB_QPT_RAW_PACKET) || - (qp->qp_type == IB_QPT_XRC_INI) || - (qp->qp_type == IB_QPT_XRC_TGT)) + if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) || + (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) || + (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) || + (qp->type == MLX5_IB_QPT_DCI)) return true; return false; } @@ -3668,7 +3663,7 @@ static unsigned int get_tx_affinity(struct ib_qp *qp, unsigned int tx_affinity; if (!(mlx5_ib_lag_should_assign_affinity(dev) && - qp_supports_affinity(qp))) + qp_supports_affinity(mqp))) return 0; if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) @@ -4161,7 +4156,11 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, rae, 1); } MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index); - MLX5_SET(dctc, dctc, port, attr->port_num); + if (mlx5_lag_is_active(dev->mdev)) + MLX5_SET(dctc, dctc, port, + get_tx_affinity_rr(dev, udata)); + else + MLX5_SET(dctc, dctc, port, attr->port_num); set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); MLX5_SET(dctc, dctc, counter_set_id, set_id); @@ -4224,6 +4223,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int err = -EINVAL; int port; + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) + return -EOPNOTSUPP; + if (ibqp->rwq_ind_tbl) return -ENOSYS; @@ -4553,7 +4555,9 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path); qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index); @@ -4716,12 +4720,12 @@ int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); } -void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); + return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); } static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) @@ -4859,7 +4863,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(rqc, rqc, delay_drop_en, 1); } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); + mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp); if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { err = set_delay_drop(dev); @@ -4921,8 +4925,8 @@ static int prepare_user_rq(struct ib_pd *pd, int err; size_t required_cmd_sz; - required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes) - + sizeof(ucmd.single_stride_log_num_of_bytes); + required_cmd_sz = offsetofend(struct mlx5_ib_create_wq, + single_stride_log_num_of_bytes); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; @@ -5006,7 +5010,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, if (!udata) return ERR_PTR(-ENOSYS); - min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved); if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); @@ -5036,8 +5040,8 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, rwq->ibwq.wq_num = rwq->core_qp.qpn; rwq->ibwq.state = IB_WQS_RESET; if (udata->outlen) { - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); + resp.response_length = offsetofend( + struct mlx5_ib_create_wq_resp, response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto err_copy; @@ -5056,22 +5060,27 @@ err: return ERR_PTR(err); } -void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); + int ret; - mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp); + ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp); + if (ret) + return ret; destroy_user_rq(dev, wq->pd, rwq, udata); kfree(rwq); + return 0; } -struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata) +int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(device); - struct mlx5_ib_rwq_ind_table *rwq_ind_tbl; + struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = + to_mrwq_ind_table(ib_rwq_ind_table); + struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device); int sz = 1 << init_attr->log_ind_tbl_size; struct mlx5_ib_create_rwq_ind_tbl_resp resp = {}; size_t min_resp_len; @@ -5084,30 +5093,25 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, if (udata->inlen > 0 && !ib_is_udata_cleared(udata, 0, udata->inlen)) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (init_attr->log_ind_tbl_size > MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) { mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n", init_attr->log_ind_tbl_size, MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)); - return ERR_PTR(-EINVAL); + return -EINVAL; } - min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + min_resp_len = + offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved); if (udata->outlen && udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); - - rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL); - if (!rwq_ind_tbl) - return ERR_PTR(-ENOMEM); + return -EINVAL; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - err = -ENOMEM; - goto err; - } + if (!in) + return -ENOMEM; rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); @@ -5122,26 +5126,24 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); kvfree(in); - if (err) - goto err; + return err; rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn; if (udata->outlen) { - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); + resp.response_length = + offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, + response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto err_copy; } - return &rwq_ind_tbl->ib_rwq_ind_tbl; + return 0; err_copy: mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); -err: - kfree(rwq_ind_tbl); - return ERR_PTR(err); + return err; } int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) @@ -5149,10 +5151,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); - mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); - - kfree(rwq_ind_tbl); - return 0; + return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); } int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, @@ -5169,7 +5168,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, void *rqc; void *in; - required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved); if (udata->inlen < required_cmd_sz) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index ba899df44c5b..5d4e140db99c 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -26,8 +26,8 @@ int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, u32 timeout_usec); -void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, - struct mlx5_core_qp *rq); +int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *rq); int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq); void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 7c3968ef9cd1..c683d7000168 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -576,11 +576,12 @@ err_destroy_rq: return err; } -void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, - struct mlx5_core_qp *rq) +int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *rq) { destroy_resource_common(dev, rq); destroy_rq_tracked(dev, rq->qpn, rq->uid); + return 0; } static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid) diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 887270dd3ce2..4ac429e72004 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -116,7 +116,7 @@ static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); - return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, + return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY, mlx5_mkey_to_idx(mr->mmkey.key)); } diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 7e10cbcb6d5c..fab6736e4d6a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -51,10 +51,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; - int npages; - int page_shift; - int ncont; - u32 offset; u32 uidx = MLX5_IB_DEFAULT_UIDX; ucmdlen = min(udata->inlen, sizeof(ucmd)); @@ -86,32 +82,14 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, err = PTR_ERR(srq->umem); return err; } - - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, - &page_shift, &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, - &offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL); - if (!in->pas) { - err = -ENOMEM; - goto err_umem; - } - - mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); + in->umem = srq->umem; err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); - goto err_in; + goto err_umem; } - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->page_offset = offset; in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) @@ -119,9 +97,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return 0; -err_in: - kvfree(in->pas); - err_umem: ib_umem_release(srq->umem); @@ -226,6 +201,11 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC && + init_attr->srq_type != IB_SRQT_TM) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", @@ -389,24 +369,21 @@ out_box: return ret; } -void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); + int ret; - mlx5_cmd_destroy_srq(dev, &msrq->msrq); - - if (srq->uobject) { - mlx5_ib_db_unmap_user( - rdma_udata_to_drv_context( - udata, - struct mlx5_ib_ucontext, - ibucontext), - &msrq->db); - ib_umem_release(msrq->umem); - } else { + ret = mlx5_cmd_destroy_srq(dev, &msrq->msrq); + if (ret) + return ret; + + if (udata) + destroy_srq_user(srq->pd, msrq, udata); + else destroy_srq_kernel(dev, msrq); - } + return 0; } void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index af197c36d757..a7e3dc5564ac 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -28,6 +28,7 @@ struct mlx5_srq_attr { u32 user_index; u64 db_record; __be64 *pas; + struct ib_umem *umem; u32 tm_log_list_size; u32 tm_next_tag; u32 tm_hw_phase_cnt; @@ -56,7 +57,7 @@ struct mlx5_srq_table { int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); -void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out); int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 37aaacebd3f2..8b3385396599 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -92,6 +92,25 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) return srq; } +static int __set_srq_page_size(struct mlx5_srq_attr *in, + unsigned long page_size) +{ + if (!page_size) + return -EINVAL; + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; + + if (WARN_ON(get_pas_size(in) != + ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64))) + return -EINVAL; + return 0; +} + +#define set_srq_page_size(in, typ, log_pgsz_fld) \ + __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \ + (in)->umem, typ, log_pgsz_fld, \ + MLX5_ADAPTER_PAGE_SHIFT, page_offset, \ + 64, &(in)->page_offset)) + static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { @@ -103,6 +122,12 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -114,7 +139,13 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_SRQ); @@ -194,6 +225,12 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, xrc_srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -207,7 +244,13 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, set_srqc(xrc_srqc, in); MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -289,11 +332,18 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in = NULL; void *rmpc; void *wq; + void *pas; int pas_size; int outlen; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; outlen = MLX5_ST_SZ_BYTES(create_rmp_out); @@ -309,8 +359,16 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); MLX5_SET(create_rmp_in, create_in, uid, in->uid); + pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas); + set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); @@ -421,10 +479,17 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in; void *xrqc; void *wq; + void *pas; int pas_size; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -433,9 +498,16 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas); set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); if (in->type == IB_SRQT_TM) { MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); @@ -590,22 +662,32 @@ err_destroy_srq_split: return err; } -void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *tmp; int err; - tmp = xa_erase_irq(&table->array, srq->srqn); - if (!tmp || tmp != srq) - return; + /* Delete entry, but leave index occupied */ + tmp = xa_cmpxchg_irq(&table->array, srq->srqn, srq, XA_ZERO_ENTRY, 0); + if (WARN_ON(tmp != srq)) + return xa_err(tmp) ?: -EINVAL; err = destroy_srq_split(dev, srq); - if (err) - return; + if (err) { + /* + * We don't need to check returned result for an error, + * because we are storing in pre-allocated space xarray + * entry and it can't fail at this stage. + */ + xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0); + return err; + } + xa_erase_irq(&table->array, srq->srqn); mlx5_core_res_put(&srq->common); wait_for_completion(&srq->common.free); + return 0; } int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 43880973a512..d6038fb6c50c 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -398,7 +398,8 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) seg->status = MLX5_MKEY_STATUS_FREE; } -static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, +static void set_reg_mkey_segment(struct mlx5_ib_dev *dev, + struct mlx5_mkey_seg *seg, const struct ib_send_wr *wr) { const struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -414,10 +415,12 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, seg, lr, 1); - MLX5_SET(mkc, seg, relaxed_ordering_write, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); - MLX5_SET(mkc, seg, relaxed_ordering_read, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + MLX5_SET(mkc, seg, relaxed_ordering_write, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + MLX5_SET(mkc, seg, relaxed_ordering_read, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); if (umrwr->pd) MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); @@ -863,13 +866,11 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; - if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) { - mlx5_ib_warn(to_mdev(qp->ibqp.device), - "Fast update of %s for MR is disabled\n", - (MLX5_CAP_GEN(dev->mdev, - umr_modify_entity_size_disabled)) ? - "entity size" : - "atomic access"); + /* Matches access in mlx5_set_umr_free_mkey() */ + if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) { + mlx5_ib_warn( + to_mdev(qp->ibqp.device), + "Fast update for MR access flags is not possible\n"); return -EINVAL; } @@ -1263,7 +1264,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_reg_mkey_segment(*seg, wr); + set_reg_mkey_segment(dev, *seg, wr); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index c3cfea243af8..26c3408dcaca 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -604,7 +604,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; default: - entry->opcode = MTHCA_OPCODE_INVALID; + entry->opcode = 0xFF; break; } } else { @@ -803,8 +803,10 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, } mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); - if (IS_ERR(mailbox)) + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); goto err_out_arm; + } cq_context = mailbox->buf; @@ -846,9 +848,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, } spin_lock_irq(&dev->cq_table.lock); - if (mthca_array_set(&dev->cq_table.cq, - cq->cqn & (dev->limits.num_cqs - 1), - cq)) { + err = mthca_array_set(&dev->cq_table.cq, + cq->cqn & (dev->limits.num_cqs - 1), cq); + if (err) { spin_unlock_irq(&dev->cq_table.lock); goto err_out_free_mr; } diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7550e9d03dec..a445160de3e1 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -105,7 +105,6 @@ enum { MTHCA_OPCODE_ATOMIC_CS = 0x11, MTHCA_OPCODE_ATOMIC_FA = 0x12, MTHCA_OPCODE_BIND_MW = 0x18, - MTHCA_OPCODE_INVALID = 0xff }; enum { @@ -548,7 +547,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp, + struct mthca_qp *qp, struct ib_udata *udata); void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); int mthca_create_ah(struct mthca_dev *dev, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9fa2f9164a47..1a3dd07f993b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -373,9 +373,10 @@ static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); + return 0; } static int mthca_ah_create(struct ib_ah *ibah, @@ -389,9 +390,10 @@ static int mthca_ah_create(struct ib_ah *ibah, init_attr->ah_attr, ah); } -static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) +static int mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); + return 0; } static int mthca_create_srq(struct ib_srq *ibsrq, @@ -440,7 +442,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq, return 0; } -static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { if (udata) { struct mthca_ucontext *context = @@ -454,6 +456,7 @@ static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) } mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); + return 0; } static struct ib_qp *mthca_create_qp(struct ib_pd *pd, @@ -467,7 +470,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, int err; if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); switch (init_attr->qp_type) { case IB_QPT_RC: @@ -532,13 +535,14 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_SMI: case IB_QPT_GSI: { - /* Don't allow userspace to create special QPs */ - if (udata) - return ERR_PTR(-EINVAL); - - qp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); + qp->sqp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL); + if (!qp->sqp) { + kfree(qp); + return ERR_PTR(-ENOMEM); + } qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; @@ -547,7 +551,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->recv_cq), init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, - to_msqp(qp), udata); + qp, udata); break; } default: @@ -556,6 +560,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, } if (err) { + kfree(qp->sqp); kfree(qp); return ERR_PTR(err); } @@ -588,7 +593,8 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) to_mqp(qp)->rq.db_index); } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); - kfree(qp); + kfree(to_mqp(qp)->sqp); + kfree(to_mqp(qp)); return 0; } @@ -606,7 +612,7 @@ static int mthca_create_cq(struct ib_cq *ibcq, udata, struct mthca_ucontext, ibucontext); if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return -EINVAL; @@ -789,7 +795,7 @@ out: return ret; } -static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { if (udata) { struct mthca_ucontext *context = @@ -808,6 +814,7 @@ static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) to_mcq(cq)->set_ci_db_index); } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); + return 0; } static inline u32 convert_access(int acc) @@ -846,7 +853,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(pd->device); - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct mthca_ucontext *context = rdma_udata_to_drv_context( udata, struct mthca_ucontext, ibucontext); struct mthca_mr *mr; @@ -877,7 +884,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err; } - n = ib_umem_num_pages(mr->umem); + n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE); mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { @@ -895,8 +902,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); - for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { - pages[i++] = sg_page_iter_dma_address(&sg_iter); + rdma_umem_for_each_dma_block(mr->umem, &biter, PAGE_SIZE) { + pages[i++] = rdma_block_iter_dma_address(&biter); /* * Be friendly to write_mtt and pass it chunks @@ -954,29 +961,34 @@ static ssize_t hw_rev_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%x\n", dev->rev_id); + return sysfs_emit(buf, "%x\n", dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); -static ssize_t hca_type_show(struct device *device, - struct device_attribute *attr, char *buf) +static const char *hca_type_string(int hca_type) { - struct mthca_dev *dev = - rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - - switch (dev->pdev->device) { + switch (hca_type) { case PCI_DEVICE_ID_MELLANOX_TAVOR: - return sprintf(buf, "MT23108\n"); + return "MT23108"; case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: - return sprintf(buf, "MT25208 (MT23108 compat mode)\n"); + return "MT25208 (MT23108 compat mode)"; case PCI_DEVICE_ID_MELLANOX_ARBEL: - return sprintf(buf, "MT25208\n"); + return "MT25208"; case PCI_DEVICE_ID_MELLANOX_SINAI: case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: - return sprintf(buf, "MT25204\n"); - default: - return sprintf(buf, "unknown\n"); + return "MT25204"; } + + return "unknown"; +} + +static ssize_t hca_type_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mthca_dev *dev = + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + + return sysfs_emit(buf, "%s\n", hca_type_string(dev->pdev->device)); } static DEVICE_ATTR_RO(hca_type); @@ -986,7 +998,7 @@ static ssize_t board_id_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); + return sysfs_emit(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -1151,36 +1163,12 @@ int mthca_register_device(struct mthca_dev *dev) if (ret) return ret; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); - if (mthca_is_memfree(dev)) ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_srq_ops); @@ -1199,7 +1187,7 @@ int mthca_register_device(struct mthca_dev *dev) mutex_init(&dev->cap_mask_mutex); rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group); - ret = ib_register_device(&dev->ib_dev, "mthca%d"); + ret = ib_register_device(&dev->ib_dev, "mthca%d", &dev->pdev->dev); if (ret) return ret; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 84c64bff0d92..8a77483bb33c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -240,6 +240,16 @@ struct mthca_wq { __be32 *db; }; +struct mthca_sqp { + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + int header_buf_size; + void *header_buf; + dma_addr_t header_dma; +}; + struct mthca_qp { struct ib_qp ibqp; int refcount; @@ -265,17 +275,7 @@ struct mthca_qp { wait_queue_head_t wait; struct mutex mutex; -}; - -struct mthca_sqp { - struct mthca_qp qp; - int pkey_index; - u32 qkey; - u32 send_psn; - struct ib_ud_header ud_header; - int header_buf_size; - void *header_buf; - dma_addr_t header_dma; + struct mthca_sqp *sqp; }; static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -313,9 +313,4 @@ static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) return container_of(ibqp, struct mthca_qp, ibqp); } -static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp) -{ - return container_of(qp, struct mthca_sqp, qp); -} - #endif /* MTHCA_PROVIDER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index c6e95d0d760a..07cfc0934b17 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -809,7 +809,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp, qp->alt_port = attr->alt_port_num; if (is_sqp(dev, qp)) - store_attrs(to_msqp(qp), attr, attr_mask); + store_attrs(qp->sqp, attr, attr_mask); /* * If we moved QP0 to RTR, bring the IB link up; if we moved @@ -863,6 +863,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, new_state; int err = -EINVAL; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + mutex_lock(&qp->mutex); if (attr_mask & IB_QP_CUR_STATE) { cur_state = attr->cur_qp_state; @@ -1368,39 +1371,40 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp, + struct mthca_qp *qp, struct ib_udata *udata) { u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; - sqp->qp.transport = MLX; - err = mthca_set_qp_size(dev, cap, pd, &sqp->qp); + qp->transport = MLX; + err = mthca_set_qp_size(dev, cap, pd, qp); if (err) return err; - sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; - sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, - &sqp->header_dma, GFP_KERNEL); - if (!sqp->header_buf) + qp->sqp->header_buf_size = qp->sq.max * MTHCA_UD_HEADER_SIZE; + qp->sqp->header_buf = + dma_alloc_coherent(&dev->pdev->dev, qp->sqp->header_buf_size, + &qp->sqp->header_dma, GFP_KERNEL); + if (!qp->sqp->header_buf) return -ENOMEM; spin_lock_irq(&dev->qp_table.lock); if (mthca_array_get(&dev->qp_table.qp, mqpn)) err = -EBUSY; else - mthca_array_set(&dev->qp_table.qp, mqpn, sqp); + mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp); spin_unlock_irq(&dev->qp_table.lock); if (err) goto err_out; - sqp->qp.port = port; - sqp->qp.qpn = mqpn; - sqp->qp.transport = MLX; + qp->port = port; + qp->qpn = mqpn; + qp->transport = MLX; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, &sqp->qp, udata); + send_policy, qp, udata); if (err) goto err_out_free; @@ -1421,10 +1425,9 @@ int mthca_alloc_sqp(struct mthca_dev *dev, mthca_unlock_cqs(send_cq, recv_cq); - err_out: - dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, - sqp->header_buf, sqp->header_dma); - +err_out: + dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size, + qp->sqp->header_buf, qp->sqp->header_dma); return err; } @@ -1487,20 +1490,19 @@ void mthca_free_qp(struct mthca_dev *dev, if (is_sqp(dev, qp)) { atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); - dma_free_coherent(&dev->pdev->dev, - to_msqp(qp)->header_buf_size, - to_msqp(qp)->header_buf, - to_msqp(qp)->header_dma); + dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size, + qp->sqp->header_buf, qp->sqp->header_dma); } else mthca_free(&dev->qp_table.alloc, qp->qpn); } /* Create UD header for an MLX send and build a data segment for it */ -static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, const struct ib_ud_wr *wr, +static int build_mlx_header(struct mthca_dev *dev, struct mthca_qp *qp, int ind, + const struct ib_ud_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { + struct mthca_sqp *sqp = qp->sqp; int header_size; int err; u16 pkey; @@ -1513,7 +1515,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, if (err) return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | + mlx->flags |= cpu_to_be32((!qp->ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); @@ -1534,29 +1536,29 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, return -EINVAL; } - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + sqp->ud_header.lrh.virtual_lane = !qp->ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - if (!sqp->qp.ibqp.qp_num) - ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, - sqp->pkey_index, &pkey); + if (!qp->ibqp.qp_num) + ib_get_cached_pkey(&dev->ib_dev, qp->port, sqp->pkey_index, + &pkey); else - ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, - wr->pkey_index, &pkey); + ib_get_cached_pkey(&dev->ib_dev, qp->port, wr->pkey_index, + &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? sqp->qkey : wr->remote_qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf + ind * MTHCA_UD_HEADER_SIZE); data->byte_count = cpu_to_be32(header_size); - data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); + data->lkey = cpu_to_be32(to_mpd(qp->ibqp.pd)->ntmr.ibmr.lkey); data->addr = cpu_to_be64(sqp->header_dma + ind * MTHCA_UD_HEADER_SIZE); @@ -1735,9 +1737,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), - wqe - sizeof (struct mthca_next_seg), - wqe); + err = build_mlx_header( + dev, qp, ind, ud_wr(wr), + wqe - sizeof(struct mthca_next_seg), wqe); if (err) { *bad_wr = wr; goto out; @@ -2065,9 +2067,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), - wqe - sizeof (struct mthca_next_seg), - wqe); + err = build_mlx_header( + dev, qp, ind, ud_wr(wr), + wqe - sizeof(struct mthca_next_seg), wqe); if (err) { *bad_wr = wr; goto out; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index fcfe0e82197a..5eb61c110090 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -185,7 +185,6 @@ struct ocrdma_hw_mr { u32 num_pbes; u32 pbl_size; u32 pbe_size; - u64 fbo; u64 va; }; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 6eea02b18968..699a8b719ed6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -215,12 +215,13 @@ av_err: return status; } -void ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) +int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); ocrdma_free_av(dev, ah); + return 0; } int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 8b73b3489f3a..35cf2e2ff391 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -53,7 +53,7 @@ enum { int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index e07bf0b2209a..c51c3f40700e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1962,6 +1962,7 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, int i; struct ocrdma_reg_nsmr *cmd; struct ocrdma_reg_nsmr_rsp *rsp; + u64 fbo = hwmr->va & (hwmr->pbe_size - 1); cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd)); if (!cmd) @@ -1987,8 +1988,8 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT; cmd->totlen_low = hwmr->len; cmd->totlen_high = upper_32_bits(hwmr->len); - cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff); - cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo); + cmd->fbo_low = lower_32_bits(fbo); + cmd->fbo_high = upper_32_bits(fbo); cmd->va_loaddr = (u32) hwmr->va; cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index d8c47d24d6d6..9a834a9cca0e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -119,7 +119,7 @@ static ssize_t hw_rev_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); + return sysfs_emit(buf, "0x%x\n", dev->nic_info.pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -129,7 +129,7 @@ static ssize_t hca_type_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); + return sysfs_emit(buf, "%s\n", &dev->model_number[0]); } static DEVICE_ATTR_RO(hca_type); @@ -154,6 +154,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .create_ah = ocrdma_create_ah, .create_cq = ocrdma_create_cq, .create_qp = ocrdma_create_qp, + .create_user_ah = ocrdma_create_ah, .dealloc_pd = ocrdma_dealloc_pd, .dealloc_ucontext = ocrdma_dealloc_ucontext, .dereg_mr = ocrdma_dereg_mr, @@ -204,32 +205,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = - OCRDMA_UVERBS(GET_CONTEXT) | - OCRDMA_UVERBS(QUERY_DEVICE) | - OCRDMA_UVERBS(QUERY_PORT) | - OCRDMA_UVERBS(ALLOC_PD) | - OCRDMA_UVERBS(DEALLOC_PD) | - OCRDMA_UVERBS(REG_MR) | - OCRDMA_UVERBS(DEREG_MR) | - OCRDMA_UVERBS(CREATE_COMP_CHANNEL) | - OCRDMA_UVERBS(CREATE_CQ) | - OCRDMA_UVERBS(RESIZE_CQ) | - OCRDMA_UVERBS(DESTROY_CQ) | - OCRDMA_UVERBS(REQ_NOTIFY_CQ) | - OCRDMA_UVERBS(CREATE_QP) | - OCRDMA_UVERBS(MODIFY_QP) | - OCRDMA_UVERBS(QUERY_QP) | - OCRDMA_UVERBS(DESTROY_QP) | - OCRDMA_UVERBS(POLL_CQ) | - OCRDMA_UVERBS(POST_SEND) | - OCRDMA_UVERBS(POST_RECV); - - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_AH) | - OCRDMA_UVERBS(MODIFY_AH) | - OCRDMA_UVERBS(QUERY_AH) | - OCRDMA_UVERBS(DESTROY_AH); dev->ibdev.node_type = RDMA_NODE_IB_CA; dev->ibdev.phys_port_cnt = 1; @@ -240,22 +215,17 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); - if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_SRQ) | - OCRDMA_UVERBS(MODIFY_SRQ) | - OCRDMA_UVERBS(QUERY_SRQ) | - OCRDMA_UVERBS(DESTROY_SRQ) | - OCRDMA_UVERBS(POST_SRQ_RECV); - + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); - } + rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1); if (ret) return ret; - return ib_register_device(&dev->ibdev, "ocrdma%d"); + dma_set_max_seg_size(&dev->nic_info.pdev->dev, UINT_MAX); + return ib_register_device(&dev->ibdev, "ocrdma%d", + &dev->nic_info.pdev->dev); } static int ocrdma_alloc_resources(struct ocrdma_dev *dev) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index c1751c9a0f62..3acb5c10b155 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -112,7 +112,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, } static inline void get_link_speed_and_width(struct ocrdma_dev *dev, - u8 *ib_speed, u8 *ib_width) + u16 *ib_speed, u8 *ib_width) { int status; u8 speed; @@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) pr_err("%s(%d) Freeing in use pdid=0x%x.\n", __func__, dev->id, pd->id); } - kfree(uctx->cntxt_pd); uctx->cntxt_pd = NULL; _ocrdma_dealloc_pd(dev, pd); + kfree(pd); } static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) @@ -664,7 +664,7 @@ exit: return status; } -void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); @@ -682,10 +682,11 @@ void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) if (is_ucontext_pd(uctx, pd)) { ocrdma_release_ucontext_pd(uctx); - return; + return 0; } } _ocrdma_dealloc_pd(dev, pd); + return 0; } static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr, @@ -810,14 +811,12 @@ static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr) return status; } -static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, - u32 num_pbes) +static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr) { struct ocrdma_pbe *pbe; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; - struct ib_umem *umem = mr->umem; - int pbe_cnt, total_num_pbes = 0; + int pbe_cnt; u64 pg_addr; if (!mr->hwmr.num_pbes) @@ -826,19 +825,14 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, pbe = (struct ocrdma_pbe *)pbl_tbl->va; pbe_cnt = 0; - for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) { /* store the page address in pbe */ - pg_addr = sg_page_iter_dma_address(&sg_iter); + pg_addr = rdma_block_iter_dma_address(&biter); pbe->pa_lo = cpu_to_le32(pg_addr); pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr)); pbe_cnt += 1; - total_num_pbes += 1; pbe++; - /* if done building pbes, issue the mbx cmd. */ - if (total_num_pbes == num_pbes) - return; - /* if the given pbl is full storing the pbes, * move to next pbl. */ @@ -857,7 +851,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_mr *mr; struct ocrdma_pd *pd; - u32 num_pbes; pd = get_ocrdma_pd(ibpd); @@ -872,13 +865,12 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, status = -EFAULT; goto umem_err; } - num_pbes = ib_umem_page_count(mr->umem); - status = ocrdma_get_pbl_info(dev, mr, num_pbes); + status = ocrdma_get_pbl_info( + dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE)); if (status) goto umem_err; mr->hwmr.pbe_size = PAGE_SIZE; - mr->hwmr.fbo = ib_umem_offset(mr->umem); mr->hwmr.va = usr_addr; mr->hwmr.len = len; mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; @@ -889,7 +881,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); if (status) goto umem_err; - build_user_pbes(dev, mr, num_pbes); + build_user_pbes(dev, mr); status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); if (status) goto mbx_err; @@ -982,7 +974,7 @@ int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ocrdma_create_cq_ureq ureq; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) @@ -1056,7 +1048,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq) spin_unlock_irqrestore(&cq->cq_lock, flags); } -void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_eq *eq = NULL; @@ -1081,6 +1073,7 @@ void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } + return 0; } static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) @@ -1306,6 +1299,9 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); if (status) goto gen_err; @@ -1398,6 +1394,9 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ocrdma_dev *dev; enum ib_qp_state old_qps, new_qps; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp = get_ocrdma_qp(ibqp); dev = get_ocrdma_dev(ibqp->device); @@ -1777,6 +1776,9 @@ int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq); + if (init_attr->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (init_attr->attr.max_sge > dev->attr.max_recv_sge) return -EINVAL; if (init_attr->attr.max_wr > dev->attr.max_rqe) @@ -1857,7 +1859,7 @@ int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return status; } -void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct ocrdma_srq *srq; struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); @@ -1872,6 +1874,7 @@ void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) kfree(srq->idx_bit_fields); kfree(srq->rqe_wr_id_tbl); + return 0; } /* unprivileged verbs and their support functions. */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index df8e3b923a44..425d554e7f3f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -67,12 +67,12 @@ void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx); int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); struct ib_qp *ocrdma_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, @@ -92,7 +92,7 @@ int ocrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attr, int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, enum ib_srq_attr_mask, struct ib_udata *); int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); -void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_recv_wr); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index d85f992bac29..8e7c069e1a2d 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -124,7 +124,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver); + return sysfs_emit(buf, "0x%x\n", dev->attr.hw_ver); } static DEVICE_ATTR_RO(hw_rev); @@ -134,10 +134,9 @@ static ssize_t hca_type_show(struct device *device, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n", - dev->pdev->device, - rdma_protocol_iwarp(&dev->ibdev, 1) ? - "iWARP" : "RoCE"); + return sysfs_emit(buf, "FastLinQ QL%x %s\n", dev->pdev->device, + rdma_protocol_iwarp(&dev->ibdev, 1) ? "iWARP" : + "RoCE"); } static DEVICE_ATTR_RO(hca_type); @@ -177,6 +176,8 @@ static int qedr_iw_register_device(struct qedr_dev *dev) } static const struct ib_device_ops qedr_roce_dev_ops = { + .alloc_xrcd = qedr_alloc_xrcd, + .dealloc_xrcd = qedr_dealloc_xrcd, .get_port_immutable = qedr_roce_port_immutable, .query_pkey = qedr_query_pkey, }; @@ -232,6 +233,7 @@ static const struct ib_device_ops qedr_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq), + INIT_RDMA_OBJ_SIZE(ib_xrcd, qedr_xrcd, ibxrcd), INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext), }; @@ -242,31 +244,6 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.node_guid = dev->attr.node_guid; memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) | - QEDR_UVERBS(QUERY_DEVICE) | - QEDR_UVERBS(QUERY_PORT) | - QEDR_UVERBS(ALLOC_PD) | - QEDR_UVERBS(DEALLOC_PD) | - QEDR_UVERBS(CREATE_COMP_CHANNEL) | - QEDR_UVERBS(CREATE_CQ) | - QEDR_UVERBS(RESIZE_CQ) | - QEDR_UVERBS(DESTROY_CQ) | - QEDR_UVERBS(REQ_NOTIFY_CQ) | - QEDR_UVERBS(CREATE_QP) | - QEDR_UVERBS(MODIFY_QP) | - QEDR_UVERBS(QUERY_QP) | - QEDR_UVERBS(DESTROY_QP) | - QEDR_UVERBS(CREATE_SRQ) | - QEDR_UVERBS(DESTROY_SRQ) | - QEDR_UVERBS(QUERY_SRQ) | - QEDR_UVERBS(MODIFY_SRQ) | - QEDR_UVERBS(POST_SRQ_RECV) | - QEDR_UVERBS(REG_MR) | - QEDR_UVERBS(DEREG_MR) | - QEDR_UVERBS(POLL_CQ) | - QEDR_UVERBS(POST_SEND) | - QEDR_UVERBS(POST_RECV); - if (IS_IWARP(dev)) { rc = qedr_iw_register_device(dev); if (rc) @@ -286,7 +263,8 @@ static int qedr_register_device(struct qedr_dev *dev) if (rc) return rc; - return ib_register_device(&dev->ibdev, "qedr%d"); + dma_set_max_seg_size(&dev->pdev->dev, UINT_MAX); + return ib_register_device(&dev->ibdev, "qedr%d", &dev->pdev->dev); } /* This function allocates fast-path status block memory */ @@ -602,7 +580,7 @@ static int qedr_set_device_attr(struct qedr_dev *dev) qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx); /* Part 2 - check capabilities */ - page_size = ~dev->attr.page_size_caps + 1; + page_size = ~qed_attr->page_size_caps + 1; if (page_size > PAGE_SIZE) { DP_ERR(dev, "Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n", @@ -705,6 +683,18 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) event.event = IB_EVENT_SRQ_ERR; event_type = EVENT_TYPE_SRQ; break; + case ROCE_ASYNC_EVENT_XRC_DOMAIN_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_INVALID_XRCETH_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_XRC_SRQ_CATASTROPHIC_ERR: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; default: DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code, roce_handle64); @@ -776,6 +766,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) } xa_unlock_irqrestore(&dev->srqs, flags); DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); + break; default: break; } @@ -1026,6 +1017,13 @@ static void qedr_notify(struct qedr_dev *dev, enum qede_rdma_event event) case QEDE_CHANGE_ADDR: qedr_mac_address_change(dev); break; + case QEDE_CHANGE_MTU: + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + if (dev->ndev->mtu != dev->iwarp_max_mtu) + DP_NOTICE(dev, + "Mtu was changed from %d to %d. This will not take affect for iWARP until qedr is reloaded\n", + dev->iwarp_max_mtu, dev->ndev->mtu); + break; default: pr_err("Event not supported\n"); } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 460292179b32..9dde70373a55 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -310,6 +310,11 @@ struct qedr_pd { struct qedr_ucontext *uctx; }; +struct qedr_xrcd { + struct ib_xrcd ibxrcd; + u16 xrcd_id; +}; + struct qedr_qp_hwq_info { /* WQE Elements */ struct qed_chain pbl; @@ -361,6 +366,7 @@ struct qedr_srq { struct ib_umem *prod_umem; u16 srq_id; u32 srq_limit; + bool is_xrc; /* lock to protect srq recv post */ spinlock_t lock; }; @@ -573,6 +579,11 @@ static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd) return container_of(ibpd, struct qedr_pd, ibpd); } +static inline struct qedr_xrcd *get_qedr_xrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct qedr_xrcd, ibxrcd); +} + static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq) { return container_of(ibcq, struct qedr_cq, ibcq); @@ -598,6 +609,28 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct qedr_srq, ibsrq); } +static inline bool qedr_qp_has_srq(struct qedr_qp *qp) +{ + return qp->srq; +} + +static inline bool qedr_qp_has_sq(struct qedr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT) + return 0; + + return 1; +} + +static inline bool qedr_qp_has_rq(struct qedr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI || + qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp)) + return 0; + + return 1; +} + static inline struct qedr_user_mmap_entry * get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry) { diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 97fc7dd353b0..c4bc58736e48 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -727,6 +727,7 @@ int qedr_iw_destroy_listen(struct iw_cm_id *cm_id) listener->qed_handle); cm_id->rem_ref(cm_id); + kfree(listener); return rc; } @@ -736,7 +737,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc = 0; + int rc; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); @@ -759,8 +760,10 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ord = conn_param->ord; if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, - &qp->iwarp_cm_flags)) + &qp->iwarp_cm_flags)) { + rc = -EINVAL; goto err; /* QP already destroyed */ + } rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); if (rc) { diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index b49bef94637e..0eb6a7a618e0 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -136,6 +136,8 @@ int qedr_query_device(struct ib_device *ibdev, IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + if (!rdma_protocol_iwarp(&dev->ibdev, 1)) + attr->device_cap_flags |= IB_DEVICE_XRC; attr->max_send_sge = qattr->max_sge; attr->max_recv_sge = qattr->max_sge; attr->max_sge_rd = qattr->max_sge; @@ -157,13 +159,13 @@ int qedr_query_device(struct ib_device *ibdev, attr->local_ca_ack_delay = qattr->dev_ack_delay; attr->max_fast_reg_page_list_len = qattr->max_mr / 8; - attr->max_pkeys = QEDR_ROCE_PKEY_MAX; + attr->max_pkeys = qattr->max_pkey; attr->max_ah = qattr->max_ah; return 0; } -static inline void get_link_speed_and_width(int speed, u8 *ib_speed, +static inline void get_link_speed_and_width(int speed, u16 *ib_speed, u8 *ib_width) { switch (speed) { @@ -231,15 +233,16 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } attr->max_mtu = IB_MTU_4096; - attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); attr->lid = 0; attr->lmc = 0; attr->sm_lid = 0; attr->sm_sl = 0; attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu); attr->gid_tbl_len = 1; } else { + attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); attr->gid_tbl_len = QEDR_MAX_SGID; attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; } @@ -471,15 +474,33 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qedr_pd *pd = get_qedr_pd(ibpd); DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); + return 0; +} + + +int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibxrcd->device); + struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd); + + return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id); } +int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibxrcd->device); + u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id; + + dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id); + return 0; +} static void qedr_free_pbl(struct qedr_dev *dev, struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl) { @@ -600,11 +621,9 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, struct qedr_pbl_info *pbl_info, u32 pg_shift) { int pbe_cnt, total_num_pbes = 0; - u32 fw_pg_cnt, fw_pg_per_umem_pg; struct qedr_pbl *pbl_tbl; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct regpair *pbe; - u64 pg_addr; if (!pbl_info->num_pbes) return; @@ -625,32 +644,25 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, pbe_cnt = 0; - fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift); - - for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - pg_addr = sg_page_iter_dma_address(&sg_iter); - for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) { - pbe->lo = cpu_to_le32(pg_addr); - pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); + rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) { + u64 pg_addr = rdma_block_iter_dma_address(&biter); - pg_addr += BIT(pg_shift); - pbe_cnt++; - total_num_pbes++; - pbe++; + pbe->lo = cpu_to_le32(pg_addr); + pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); - if (total_num_pbes == pbl_info->num_pbes) - return; + pbe_cnt++; + total_num_pbes++; + pbe++; - /* If the given pbl is full storing the pbes, - * move to next pbl. - */ - if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { - pbl_tbl++; - pbe = (struct regpair *)pbl_tbl->va; - pbe_cnt = 0; - } + if (total_num_pbes == pbl_info->num_pbes) + return; - fw_pg_cnt++; + /* If the given pbl is full storing the pbes, move to next pbl. + */ + if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + pbe_cnt = 0; } } } @@ -792,9 +804,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, return PTR_ERR(q->umem); } - fw_pages = ib_umem_page_count(q->umem) << - (PAGE_SHIFT - FW_PAGE_SHIFT); - + fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT); rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0); if (rc) goto err0; @@ -918,6 +928,9 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, "create_cq: called from %s. entries=%d, vector=%d\n", udata ? "User Lib" : "Kernel", entries, vector); + if (attr->flags) + return -EOPNOTSUPP; + if (entries > QEDR_MAX_CQES) { DP_ERR(dev, "create cq: the number of entries %d is too high. Must be equal or below %d.\n", @@ -999,7 +1012,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, /* Generate doorbell address. */ cq->db.data.icid = cq->icid; cq->db_addr = dev->db_addr + db_offset; - cq->db.data.params = DB_AGG_CMD_SET << + cq->db.data.params = DB_AGG_CMD_MAX << RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; /* point to the very last element, passing it we will toggle */ @@ -1051,7 +1064,7 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) #define QEDR_DESTROY_CQ_ITER_DURATION (10) -void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibcq->device); struct qed_rdma_destroy_cq_out_params oparams; @@ -1066,7 +1079,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) /* GSIs CQs are handled by driver, so they don't exist in the FW */ if (cq->cq_type == QEDR_CQ_TYPE_GSI) { qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); - return; + return 0; } iparams.icid = cq->icid; @@ -1114,6 +1127,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) * Since the destroy CQ ramrod has also been received on the EQ we can * be certain that there's no event handler in process. */ + return 0; } static inline int get_gid_info_from_table(struct ib_qp *ibqp, @@ -1146,7 +1160,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, SET_FIELD(qp_params->modify_flags, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); break; - case RDMA_NETWORK_IB: + case RDMA_NETWORK_ROCE_V1: memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], @@ -1166,6 +1180,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); qp_params->roce_mode = ROCE_V2_IPV4; break; + default: + return -EINVAL; } for (i = 0; i < 4; i++) { @@ -1186,7 +1202,10 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, struct qedr_device_attr *qattr = &dev->attr; /* QP0... attrs->qp_type == IB_QPT_GSI */ - if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) { + if (attrs->qp_type != IB_QPT_RC && + attrs->qp_type != IB_QPT_GSI && + attrs->qp_type != IB_QPT_XRC_INI && + attrs->qp_type != IB_QPT_XRC_TGT) { DP_DEBUG(dev, QEDR_MSG_QP, "create qp: unsupported qp type=0x%x requested\n", attrs->qp_type); @@ -1221,12 +1240,20 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, return -EINVAL; } - /* Unprivileged user space cannot create special QP */ - if (udata && attrs->qp_type == IB_QPT_GSI) { - DP_ERR(dev, - "create qp: userspace can't create special QPs of type=0x%x\n", - attrs->qp_type); - return -EINVAL; + /* verify consumer QPs are not trying to use GSI QP's CQ. + * TGT QP isn't associated with RQ/SQ + */ + if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) && + (attrs->qp_type != IB_QPT_XRC_TGT)) { + struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq); + struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq); + + if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) || + (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) { + DP_ERR(dev, + "create qp: consumer QP cannot use GSI CQs.\n"); + return -EINVAL; + } } return 0; @@ -1248,8 +1275,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev, } static void qedr_copy_rq_uresp(struct qedr_dev *dev, - struct qedr_create_qp_uresp *uresp, - struct qedr_qp *qp) + struct qedr_create_qp_uresp *uresp, + struct qedr_qp *qp) { /* iWARP requires two doorbells per RQ. */ if (rdma_protocol_iwarp(&dev->ibdev, 1)) { @@ -1291,8 +1318,12 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev, int rc; memset(uresp, 0, sizeof(*uresp)); - qedr_copy_sq_uresp(dev, uresp, qp); - qedr_copy_rq_uresp(dev, uresp, qp); + + if (qedr_qp_has_sq(qp)) + qedr_copy_sq_uresp(dev, uresp, qp); + + if (qedr_qp_has_rq(qp)) + qedr_copy_rq_uresp(dev, uresp, qp); uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; uresp->qp_id = qp->qp_id; @@ -1316,18 +1347,25 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, kref_init(&qp->refcnt); init_completion(&qp->iwarp_cm_comp); } + qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; - qp->sq.max_sges = attrs->cap.max_send_sge; qp->state = QED_ROCE_QP_STATE_RESET; qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; - qp->sq_cq = get_qedr_cq(attrs->send_cq); qp->dev = dev; + if (qedr_qp_has_sq(qp)) { + qp->sq.max_sges = attrs->cap.max_send_sge; + qp->sq_cq = get_qedr_cq(attrs->send_cq); + DP_DEBUG(dev, QEDR_MSG_QP, + "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n", + qp->sq.max_sges, qp->sq_cq->icid); + } - if (attrs->srq) { + if (attrs->srq) qp->srq = get_qedr_srq(attrs->srq); - } else { + + if (qedr_qp_has_rq(qp)) { qp->rq_cq = get_qedr_cq(attrs->recv_cq); qp->rq.max_sges = attrs->cap.max_recv_sge; DP_DEBUG(dev, QEDR_MSG_QP, @@ -1346,30 +1384,26 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { - int rc; + int rc = 0; - qp->sq.db = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); - qp->sq.db_data.data.icid = qp->icid + 1; - rc = qedr_db_recovery_add(dev, qp->sq.db, - &qp->sq.db_data, - DB_REC_WIDTH_32B, - DB_REC_KERNEL); - if (rc) - return rc; + if (qedr_qp_has_sq(qp)) { + qp->sq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + qp->sq.db_data.data.icid = qp->icid + 1; + rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data, + DB_REC_WIDTH_32B, DB_REC_KERNEL); + if (rc) + return rc; + } - if (!qp->srq) { + if (qedr_qp_has_rq(qp)) { qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; - - rc = qedr_db_recovery_add(dev, qp->rq.db, - &qp->rq.db_data, - DB_REC_WIDTH_32B, - DB_REC_KERNEL); - if (rc) - qedr_db_recovery_del(dev, qp->sq.db, - &qp->sq.db_data); + rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data, + DB_REC_WIDTH_32B, DB_REC_KERNEL); + if (rc && qedr_qp_has_sq(qp)) + qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data); } return rc; @@ -1392,6 +1426,10 @@ static int qedr_check_srq_params(struct qedr_dev *dev, DP_ERR(dev, "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n", attrs->attr.max_sge, qattr->max_sge); + } + + if (!udata && attrs->srq_type == IB_SRQT_XRC) { + DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n"); return -EINVAL; } @@ -1511,11 +1549,16 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, "create SRQ called from %s (pd %p)\n", (udata) ? "User lib" : "kernel", pd); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + rc = qedr_check_srq_params(dev, init_attr, udata); if (rc) return -EINVAL; srq->dev = dev; + srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC); hw_srq = &srq->hw_srq; spin_lock_init(&srq->lock); @@ -1557,6 +1600,14 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, in_params.prod_pair_addr = phy_prod_pair_addr; in_params.num_pages = page_cnt; in_params.page_size = page_size; + if (srq->is_xrc) { + struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd); + struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq); + + in_params.is_xrc = 1; + in_params.xrcd_id = xrcd->xrcd_id; + in_params.cq_cid = cq->icid; + } rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params); if (rc) @@ -1591,7 +1642,7 @@ err0: return -EFAULT; } -void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct qed_rdma_destroy_srq_in_params in_params = {}; struct qedr_dev *dev = get_qedr_dev(ibsrq->device); @@ -1599,6 +1650,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) xa_erase_irq(&dev->srqs, srq->srq_id); in_params.srq_id = srq->srq_id; + in_params.is_xrc = srq->is_xrc; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); if (ibsrq->uobject) @@ -1609,6 +1661,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) DP_DEBUG(dev, QEDR_MSG_SRQ, "destroy srq: destroyed srq with srq_id=0x%0x\n", srq->srq_id); + return 0; } int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -1649,6 +1702,20 @@ int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return 0; } +static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type) +{ + switch (ib_qp_type) { + case IB_QPT_RC: + return QED_RDMA_QP_TYPE_RC; + case IB_QPT_XRC_INI: + return QED_RDMA_QP_TYPE_XRC_INI; + case IB_QPT_XRC_TGT: + return QED_RDMA_QP_TYPE_XRC_TGT; + default: + return QED_RDMA_QP_TYPE_INVAL; + } +} + static inline void qedr_init_common_qp_in_params(struct qedr_dev *dev, struct qedr_pd *pd, @@ -1663,20 +1730,27 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev, params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); params->fmr_and_reserved_lkey = fmr_and_reserved_lkey; - params->pd = pd->pd_id; - params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; - params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; + params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type); params->stats_queue = 0; - params->srq_id = 0; - params->use_srq = false; - if (!qp->srq) { + if (pd) { + params->pd = pd->pd_id; + params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; + } + + if (qedr_qp_has_sq(qp)) + params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; + + if (qedr_qp_has_rq(qp)) params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; - } else { + if (qedr_qp_has_srq(qp)) { params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; params->srq_id = qp->srq->srq_id; params->use_srq = true; + } else { + params->srq_id = 0; + params->use_srq = false; } } @@ -1690,8 +1764,10 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) "rq_len=%zd" "\n", qp, - qp->usq.buf_addr, - qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); + qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0, + qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0, + qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0, + qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0); } static inline void @@ -1717,11 +1793,15 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_ucontext *ctx, struct qedr_qp *qp) { - ib_umem_release(qp->usq.umem); - qp->usq.umem = NULL; + if (qedr_qp_has_sq(qp)) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + } - ib_umem_release(qp->urq.umem); - qp->urq.umem = NULL; + if (qedr_qp_has_rq(qp)) { + ib_umem_release(qp->urq.umem); + qp->urq.umem = NULL; + } if (rdma_protocol_roce(&dev->ibdev, 1)) { qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); @@ -1756,28 +1836,38 @@ static int qedr_create_user_qp(struct qedr_dev *dev, { struct qed_rdma_create_qp_in_params in_params; struct qed_rdma_create_qp_out_params out_params; - struct qedr_pd *pd = get_qedr_pd(ibpd); - struct qedr_create_qp_uresp uresp; - struct qedr_ucontext *ctx = pd ? pd->uctx : NULL; - struct qedr_create_qp_ureq ureq; + struct qedr_create_qp_uresp uresp = {}; + struct qedr_create_qp_ureq ureq = {}; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); - int rc = -EINVAL; + struct qedr_ucontext *ctx = NULL; + struct qedr_pd *pd = NULL; + int rc = 0; qp->create_type = QEDR_QP_CREATE_USER; - memset(&ureq, 0, sizeof(ureq)); - rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen)); - if (rc) { - DP_ERR(dev, "Problem copying data from user space\n"); - return rc; + + if (ibpd) { + pd = get_qedr_pd(ibpd); + ctx = pd->uctx; } - /* SQ - read access only (0) */ - rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, true, 0, alloc_and_init); - if (rc) - return rc; + if (udata) { + rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen)); + if (rc) { + DP_ERR(dev, "Problem copying data from user space\n"); + return rc; + } + } - if (!qp->srq) { + if (qedr_qp_has_sq(qp)) { + /* SQ - read access only (0) */ + rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, + ureq.sq_len, true, 0, alloc_and_init); + if (rc) + return rc; + } + + if (qedr_qp_has_rq(qp)) { /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); @@ -1789,9 +1879,21 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); in_params.qp_handle_lo = ureq.qp_handle_lo; in_params.qp_handle_hi = ureq.qp_handle_hi; - in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; - in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; - if (!qp->srq) { + + if (qp->qp_type == IB_QPT_XRC_TGT) { + struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd); + + in_params.xrcd_id = xrcd->xrcd_id; + in_params.qp_handle_lo = qp->qp_id; + in_params.use_srq = 1; + } + + if (qedr_qp_has_sq(qp)) { + in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; + in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; + } + + if (qedr_qp_has_rq(qp)) { in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; } @@ -1813,41 +1915,43 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); - if (rc) - goto err; + if (udata) { + rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); + if (rc) + goto err; + } /* db offset was calculated in copy_qp_uresp, now set in the user q */ - ctx = pd->uctx; - qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; - qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + if (qedr_qp_has_sq(qp)) { + qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; + rc = qedr_db_recovery_add(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + } + + if (qedr_qp_has_rq(qp)) { + qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + rc = qedr_db_recovery_add(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + } if (rdma_protocol_iwarp(&dev->ibdev, 1)) { qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset; /* calculate the db_rec_db2 data since it is constant so no - * need to reflect from user + * need to reflect from user */ qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid); qp->urq.db_rec_db2_data.data.value = cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD); - } - rc = qedr_db_recovery_add(dev, qp->usq.db_addr, - &qp->usq.db_rec_data->db_data, - DB_REC_WIDTH_32B, - DB_REC_USER); - if (rc) - goto err; - - rc = qedr_db_recovery_add(dev, qp->urq.db_addr, - &qp->urq.db_rec_data->db_data, - DB_REC_WIDTH_32B, - DB_REC_USER); - if (rc) - goto err; - - if (rdma_protocol_iwarp(&dev->ibdev, 1)) { rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr, &qp->urq.db_rec_db2_data, DB_REC_WIDTH_32B, @@ -1856,7 +1960,6 @@ static int qedr_create_user_qp(struct qedr_dev *dev, goto err; } qedr_qp_user_print(dev, qp); - return rc; err: rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -2112,16 +2215,50 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, return rc; } +static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, + struct ib_udata *udata) +{ + struct qedr_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + int rc; + + if (qp->qp_type != IB_QPT_GSI) { + rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); + if (rc) + return rc; + } + + if (qp->create_type == QEDR_QP_CREATE_USER) + qedr_cleanup_user(dev, ctx, qp); + else + qedr_cleanup_kernel(dev, qp); + + return 0; +} + struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { - struct qedr_dev *dev = get_qedr_dev(ibpd->device); - struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_xrcd *xrcd = NULL; + struct qedr_pd *pd = NULL; + struct qedr_dev *dev; struct qedr_qp *qp; struct ib_qp *ibqp; int rc = 0; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + + if (attrs->qp_type == IB_QPT_XRC_TGT) { + xrcd = get_qedr_xrcd(attrs->xrcd); + dev = get_qedr_dev(xrcd->ibxrcd.device); + } else { + pd = get_qedr_pd(ibpd); + dev = get_qedr_dev(ibpd->device); + } + DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", udata ? "user library" : "kernel", pd); @@ -2152,25 +2289,27 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, return ibqp; } - if (udata) + if (udata || xrcd) rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs); else rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs); if (rc) - goto err; + goto out_free_qp; qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) - goto err; + goto out_free_qp_resources; } return &qp->ibqp; -err: +out_free_qp_resources: + qedr_free_qp_resources(dev, qp, udata); +out_free_qp: kfree(qp); return ERR_PTR(-EFAULT); @@ -2348,6 +2487,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, attr->qp_state); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + old_qp_state = qedr_get_ibqp_state(qp->state); if (attr_mask & IB_QP_STATE) new_qp_state = attr->qp_state; @@ -2636,7 +2778,7 @@ int qedr_query_qp(struct ib_qp *ibqp, qp_attr->cap.max_recv_wr = qp->rq.max_wr; qp_attr->cap.max_send_sge = qp->sq.max_sges; qp_attr->cap.max_recv_sge = qp->rq.max_sges; - qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; + qp_attr->cap.max_inline_data = dev->attr.max_inline; qp_init_attr->cap = qp_attr->cap; qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; @@ -2671,28 +2813,6 @@ err: return rc; } -static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, - struct ib_udata *udata) -{ - struct qedr_ucontext *ctx = - rdma_udata_to_drv_context(udata, struct qedr_ucontext, - ibucontext); - int rc; - - if (qp->qp_type != IB_QPT_GSI) { - rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); - if (rc) - return rc; - } - - if (qp->create_type == QEDR_QP_CREATE_USER) - qedr_cleanup_user(dev, ctx, qp); - else - qedr_cleanup_kernel(dev, qp); - - return 0; -} - int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -2752,6 +2872,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (rdma_protocol_iwarp(&dev->ibdev, 1)) qedr_iw_qp_rem_ref(&qp->ibqp); + else + kfree(qp); return 0; } @@ -2766,11 +2888,12 @@ int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, return 0; } -void qedr_destroy_ah(struct ib_ah *ibah, u32 flags) +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags) { struct qedr_ah *ah = get_qedr_ah(ibah); rdma_destroy_ah_attr(&ah->attr); + return 0; } static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) @@ -2861,7 +2984,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, goto err0; } - rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); + rc = init_mr_info(dev, &mr->info, + ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1); if (rc) goto err1; @@ -2888,10 +3012,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); mr->hw_mr.page_size_log = PAGE_SHIFT; - mr->hw_mr.fbo = ib_umem_offset(mr->umem); mr->hw_mr.length = len; mr->hw_mr.vaddr = usr_addr; - mr->hw_mr.zbva = false; mr->hw_mr.phy_mr = false; mr->hw_mr.dma_mr = false; @@ -2984,10 +3106,8 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, mr->hw_mr.pbl_ptr = 0; mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); - mr->hw_mr.fbo = 0; mr->hw_mr.length = 0; mr->hw_mr.vaddr = 0; - mr->hw_mr.zbva = false; mr->hw_mr.phy_mr = true; mr->hw_mr.dma_mr = false; @@ -3765,10 +3885,10 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod); /* Make sure sge producer is updated first */ dma_wmb(); - srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod); wr = wr->next; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 39dd6286ba39..2672c32bc2f7 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -47,12 +47,13 @@ void qedr_dealloc_ucontext(struct ib_ucontext *uctx); int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma); void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); - +int qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); +int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, struct ib_udata *); @@ -67,12 +68,12 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *attr, int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void qedr_destroy_ah(struct ib_ah *ibah, u32 flags); +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags); int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 432d6d0fd7f4..ee211423058a 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -619,11 +619,11 @@ struct qib_pportdata { /* LID mask control */ u8 lmc; u8 link_width_supported; - u8 link_speed_supported; + u16 link_speed_supported; u8 link_width_enabled; - u8 link_speed_enabled; + u16 link_speed_enabled; u8 link_width_active; - u8 link_speed_active; + u16 link_speed_active; u8 vls_supported; u8 vls_operational; /* Rx Polarity inversion (compensate for ~tx on partner) */ diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index a10eab89aee4..189a0ce6056a 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1733,9 +1733,9 @@ done: return; } -static void qib_error_tasklet(unsigned long data) +static void qib_error_tasklet(struct tasklet_struct *t) { - struct qib_devdata *dd = (struct qib_devdata *)data; + struct qib_devdata *dd = from_tasklet(dd, t, error_tasklet); handle_7322_errors(dd); qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); @@ -3537,8 +3537,7 @@ try_intx: for (i = 0; i < ARRAY_SIZE(redirect); i++) qib_write_kreg(dd, kr_intredirect + i, redirect[i]); dd->cspec->main_int_mask = mask; - tasklet_init(&dd->error_tasklet, qib_error_tasklet, - (unsigned long)dd); + tasklet_setup(&dd->error_tasklet, qib_error_tasklet); } /** diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index e7789e724f56..f83e331977f8 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2293,76 +2293,50 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; - int ret; - *out_mad = *in_mad; if (ccp->class_version != 2) { ccp->status |= IB_SMP_UNSUP_VERSION; - ret = reply((struct ib_smp *)ccp); - goto bail; + return reply((struct ib_smp *)ccp); } switch (ccp->method) { case IB_MGMT_METHOD_GET: switch (ccp->attr_id) { case IB_CC_ATTR_CLASSPORTINFO: - ret = cc_get_classportinfo(ccp, ibdev); - goto bail; - + return cc_get_classportinfo(ccp, ibdev); case IB_CC_ATTR_CONGESTION_INFO: - ret = cc_get_congestion_info(ccp, ibdev, port); - goto bail; - + return cc_get_congestion_info(ccp, ibdev, port); case IB_CC_ATTR_CA_CONGESTION_SETTING: - ret = cc_get_congestion_setting(ccp, ibdev, port); - goto bail; - + return cc_get_congestion_setting(ccp, ibdev, port); case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: - ret = cc_get_congestion_control_table(ccp, ibdev, port); - goto bail; - - fallthrough; + return cc_get_congestion_control_table(ccp, ibdev, port); default: ccp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) ccp); - goto bail; + return reply((struct ib_smp *) ccp); } - case IB_MGMT_METHOD_SET: switch (ccp->attr_id) { case IB_CC_ATTR_CA_CONGESTION_SETTING: - ret = cc_set_congestion_setting(ccp, ibdev, port); - goto bail; - + return cc_set_congestion_setting(ccp, ibdev, port); case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: - ret = cc_set_congestion_control_table(ccp, ibdev, port); - goto bail; - - fallthrough; + return cc_set_congestion_control_table(ccp, ibdev, port); default: ccp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) ccp); - goto bail; + return reply((struct ib_smp *) ccp); } - case IB_MGMT_METHOD_GET_RESP: /* * The ib_mad module will call us to process responses * before checking for other consumers. * Just tell the caller to process it normally. */ - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - - case IB_MGMT_METHOD_TRAP: - default: - ccp->status |= IB_SMP_UNSUP_METHOD; - ret = reply((struct ib_smp *) ccp); + return IB_MAD_RESULT_SUCCESS; } -bail: - return ret; + /* method is unsupported */ + ccp->status |= IB_SMP_UNSUP_METHOD; + return reply((struct ib_smp *) ccp); } /** diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 3dc6ce033319..2e07b3749b88 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -90,25 +90,18 @@ int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) goto bail; } - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (ret) { /* * If the 64 bit setup fails, try 32 bit. Some systems * do not setup 64 bit maps on systems with 2GB or less * memory installed. */ - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret); goto bail; } - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } else - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - qib_early_err(&pdev->dev, - "Unable to set DMA consistent mask: %d\n", ret); - goto bail; } pci_set_master(pdev); diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 8f8d61736656..5e86cbf7d70e 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -62,7 +62,7 @@ static void sdma_get(struct qib_sdma_state *); static void sdma_put(struct qib_sdma_state *); static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states); static void sdma_start_sw_clean_up(struct qib_pportdata *); -static void sdma_sw_clean_up_task(unsigned long); +static void sdma_sw_clean_up_task(struct tasklet_struct *); static void unmap_desc(struct qib_pportdata *, unsigned); static void sdma_get(struct qib_sdma_state *ss) @@ -119,9 +119,10 @@ static void clear_sdma_activelist(struct qib_pportdata *ppd) } } -static void sdma_sw_clean_up_task(unsigned long opaque) +static void sdma_sw_clean_up_task(struct tasklet_struct *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *) opaque; + struct qib_pportdata *ppd = from_tasklet(ppd, t, + sdma_sw_clean_up_task); unsigned long flags; spin_lock_irqsave(&ppd->sdma_lock, flags); @@ -436,8 +437,7 @@ int qib_setup_sdma(struct qib_pportdata *ppd) INIT_LIST_HEAD(&ppd->sdma_activelist); - tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task, - (unsigned long)ppd); + tasklet_setup(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task); ret = dd->f_init_sdma_regs(ppd); if (ret) diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 021df0654ba7..62c179fc764b 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -43,11 +43,8 @@ static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf) { struct qib_devdata *dd = ppd->dd; - int ret; - ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT); - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; + return sysfs_emit(buf, "%d\n", dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT)); } static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, @@ -106,14 +103,10 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, static ssize_t show_status(struct qib_pportdata *ppd, char *buf) { - ssize_t ret; - if (!ppd->statusp) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", - (unsigned long long) *(ppd->statusp)); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "0x%llx\n", (unsigned long long)*(ppd->statusp)); } /* @@ -392,7 +385,7 @@ static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct qib_pportdata, sl2vl_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); + return sysfs_emit(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); } static const struct sysfs_ops qib_sl2vl_ops = { @@ -501,17 +494,18 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; + u64 val; if (!strncmp(dattr->attr.name, "rc_acks", 7)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks)); + val = READ_PER_CPU_CNTR(rc_acks); else if (!strncmp(dattr->attr.name, "rc_qacks", 8)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks)); + val = READ_PER_CPU_CNTR(rc_qacks); else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15)) - return sprintf(buf, "%llu\n", - READ_PER_CPU_CNTR(rc_delayed_comp)); + val = READ_PER_CPU_CNTR(rc_delayed_comp); else - return sprintf(buf, "%u\n", - *(u32 *)((char *)qibp + dattr->counter)); + val = *(u32 *)((char *)qibp + dattr->counter); + + return sysfs_emit(buf, "%llu\n", val); } static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, @@ -565,7 +559,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -575,13 +569,10 @@ static ssize_t hca_type_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); - return ret; + return -EINVAL; + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(hca_type); static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL); @@ -590,7 +581,7 @@ static ssize_t version_show(struct device *device, struct device_attribute *attr, char *buf) { /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); + return sysfs_emit(buf, "%s", (char *)ib_qib_version); } static DEVICE_ATTR_RO(version); @@ -602,7 +593,7 @@ static ssize_t boardversion_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -614,7 +605,7 @@ static ssize_t localbus_info_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); + return sysfs_emit(buf, "%s", dd->lbus_info); } static DEVICE_ATTR_RO(localbus_info); @@ -628,9 +619,10 @@ static ssize_t nctxts_show(struct device *device, /* Return the number of user ports (contexts) available. */ /* The calculation below deals with a special case where * cfgctxts is set to 1 on a single-port board. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - (dd->first_user_ctxt > dd->cfgctxts) ? 0 : - (dd->cfgctxts - dd->first_user_ctxt)); + return sysfs_emit(buf, "%u\n", + (dd->first_user_ctxt > dd->cfgctxts) ? + 0 : + (dd->cfgctxts - dd->first_user_ctxt)); } static DEVICE_ATTR_RO(nctxts); @@ -642,21 +634,20 @@ static ssize_t nfreectxts_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); -static ssize_t serial_show(struct device *device, - struct device_attribute *attr, char *buf) +static ssize_t serial_show(struct device *device, struct device_attribute *attr, + char *buf) { struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); + const u8 *end = memchr(dd->serial, 0, ARRAY_SIZE(dd->serial)); + int size = end ? end - dd->serial : ARRAY_SIZE(dd->serial); - buf[sizeof(dd->serial)] = '\0'; - memcpy(buf, dd->serial, sizeof(dd->serial)); - strcat(buf, "\n"); - return strlen(buf); + return sysfs_emit(buf, ".%*s\n", size, dd->serial); } static DEVICE_ATTR_RO(serial); @@ -689,27 +680,26 @@ static ssize_t tempsense_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; - int idx; + int i; u8 regvals[8]; - ret = -ENXIO; - for (idx = 0; idx < 8; ++idx) { - if (idx == 6) + for (i = 0; i < 8; i++) { + int ret; + + if (i == 6) continue; - ret = dd->f_tempsense_rd(dd, idx); + ret = dd->f_tempsense_rd(dd, i); if (ret < 0) - break; - regvals[idx] = ret; + return ret; /* return error on bad read */ + regvals[i] = ret; } - if (idx == 8) - ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", - *(signed char *)(regvals), - *(signed char *)(regvals + 1), - regvals[2], regvals[3], - *(signed char *)(regvals + 5), - *(signed char *)(regvals + 7)); - return ret; + return sysfs_emit(buf, "%d %d %02X %02X %d %d\n", + (signed char)regvals[0], + (signed char)regvals[1], + regvals[2], + regvals[3], + (signed char)regvals[5], + (signed char)regvals[7]); } static DEVICE_ATTR_RO(tempsense); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 662e7fc7f628..1b63a491fa72 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -315,7 +315,6 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; @@ -355,7 +354,6 @@ static const struct ib_device_ops usnic_dev_ops = { .modify_qp = usnic_ib_modify_qp, .query_device = usnic_ib_query_device, .query_gid = usnic_ib_query_gid, - .query_pkey = usnic_ib_query_pkey, .query_port = usnic_ib_query_port, .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, @@ -400,25 +398,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; us_ibdev->ib_dev.dev.parent = &dev->dev; - us_ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); @@ -427,7 +406,8 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (ret) goto err_fwd_dealloc; - if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d")) + dma_set_max_seg_size(&dev->dev, SZ_2G); + if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", &dev->dev)) goto err_fwd_dealloc; usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index c85d48ae7442..586b0e52ba7f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -57,7 +57,7 @@ static ssize_t board_id_show(struct device *device, subsystem_device_id = us_ibdev->pdev->subsystem_device; mutex_unlock(&us_ibdev->usdev_lock); - return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); + return sysfs_emit(buf, "%u\n", subsystem_device_id); } static DEVICE_ATTR_RO(board_id); @@ -69,19 +69,13 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) { struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - char *ptr; - unsigned left; - unsigned n; enum usnic_vnic_res_type res_type; - - /* Buffer space limit is 1 page */ - ptr = buf; - left = PAGE_SIZE; + int len; mutex_lock(&us_ibdev->usdev_lock); if (kref_read(&us_ibdev->vf_cnt) > 0) { char *busname; - + char *sep = ""; /* * bus name seems to come with annoying prefix. * Remove it if it is predictable @@ -90,39 +84,35 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) if (strncmp(busname, "PCI Bus ", 8) == 0) busname += 8; - n = scnprintf(ptr, left, - "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", - dev_name(&us_ibdev->ib_dev.dev), - busname, - PCI_SLOT(us_ibdev->pdev->devfn), - PCI_FUNC(us_ibdev->pdev->devfn), - netdev_name(us_ibdev->netdev), - us_ibdev->ufdev->mac, - kref_read(&us_ibdev->vf_cnt)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: %s:%d.%d, %s, %pM, %u VFs\n", + dev_name(&us_ibdev->ib_dev.dev), + busname, + PCI_SLOT(us_ibdev->pdev->devfn), + PCI_FUNC(us_ibdev->pdev->devfn), + netdev_name(us_ibdev->netdev), + us_ibdev->ufdev->mac, + kref_read(&us_ibdev->vf_cnt)); + len += sysfs_emit_at(buf, len, " Per VF:"); for (res_type = USNIC_VNIC_RES_TYPE_EOL; - res_type < USNIC_VNIC_RES_TYPE_MAX; - res_type++) { + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { if (us_ibdev->vf_res_cnt[res_type] == 0) continue; - n = scnprintf(ptr, left, " %d %s%s", - us_ibdev->vf_res_cnt[res_type], - usnic_vnic_res_type_to_str(res_type), - (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ? - "," : ""); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "%s %d %s", + sep, + us_ibdev->vf_res_cnt[res_type], + usnic_vnic_res_type_to_str(res_type)); + sep = ","; } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "\n"); } else { - n = scnprintf(ptr, left, "%s: no VFs\n", - dev_name(&us_ibdev->ib_dev.dev)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: no VFs\n", + dev_name(&us_ibdev->ib_dev.dev)); } + mutex_unlock(&us_ibdev->usdev_lock); - return ptr - buf; + return len; } static DEVICE_ATTR_RO(config); @@ -132,8 +122,7 @@ iface_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", - netdev_name(us_ibdev->netdev)); + return sysfs_emit(buf, "%s\n", netdev_name(us_ibdev->netdev)); } static DEVICE_ATTR_RO(iface); @@ -143,8 +132,7 @@ max_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", - kref_read(&us_ibdev->vf_cnt)); + return sysfs_emit(buf, "%u\n", kref_read(&us_ibdev->vf_cnt)); } static DEVICE_ATTR_RO(max_vf); @@ -158,8 +146,7 @@ qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); - return scnprintf(buf, PAGE_SIZE, - "%d\n", qp_per_vf); + return sysfs_emit(buf, "%d\n", qp_per_vf); } static DEVICE_ATTR_RO(qp_per_vf); @@ -169,8 +156,8 @@ cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", - us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); + return sysfs_emit(buf, "%d\n", + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); } static DEVICE_ATTR_RO(cq_per_vf); @@ -217,43 +204,35 @@ struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME) static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx); + return sysfs_emit(buf, "0x%p\n", qp_grp->ctx); } static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - int i, j, n; - int left; - char *ptr; + int i, j; struct usnic_vnic_res_chunk *res_chunk; struct usnic_vnic_res *vnic_res; + int len; - left = PAGE_SIZE; - ptr = buf; - - n = scnprintf(ptr, left, - "QPN: %d State: (%s) PID: %u VF Idx: %hu ", - qp_grp->ibqp.qp_num, - usnic_ib_qp_grp_state_to_string(qp_grp->state), - qp_grp->owner_pid, - usnic_vnic_get_index(qp_grp->vf->vnic)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu", + qp_grp->ibqp.qp_num, + usnic_ib_qp_grp_state_to_string(qp_grp->state), + qp_grp->owner_pid, + usnic_vnic_get_index(qp_grp->vf->vnic)); for (i = 0; qp_grp->res_chunk_list[i]; i++) { res_chunk = qp_grp->res_chunk_list[i]; for (j = 0; j < res_chunk->cnt; j++) { vnic_res = res_chunk->res[j]; - n = scnprintf(ptr, left, "%s[%d] ", + len += sysfs_emit_at(buf, len, " %s[%d]", usnic_vnic_res_type_to_str(vnic_res->type), vnic_res->vnic_idx); - UPDATE_PTR_LEFT(n, ptr, left); } } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "\n"); - return ptr - buf; + return len; } static QPN_ATTR_RO(context); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index b8a77ce11590..3705c6b8b223 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, } usnic_uiom_free_dev_list(dev_list); + dev_list = NULL; } /* Try to find resources on an unused vf */ @@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, qp_grp_check: if (IS_ERR_OR_NULL(qp_grp)) { usnic_err("Failed to allocate qp_grp\n"); + if (usnic_ib_share_vf) + usnic_uiom_free_dev_list(dev_list); return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); } return qp_grp; @@ -367,7 +370,6 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, props->port_cap_flags = 0; props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; props->qkey_viol_cntr = 0; props->max_mtu = IB_MTU_4096; @@ -437,16 +439,6 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, return 0; } -int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - if (index > 0) - return -EINVAL; - - *pkey = 0xffff; - return 0; -} - int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct usnic_ib_pd *pd = to_upd(ibpd); @@ -460,9 +452,10 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); + return 0; } struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, @@ -484,7 +477,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, us_ibdev = to_usdev(pd->device); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); if (err) { @@ -567,6 +560,9 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int status; usnic_dbg("\n"); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp_grp = to_uqp_grp(ibqp); mutex_lock(&qp_grp->vf->pf->usdev_lock); @@ -591,14 +587,14 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; return 0; } -void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { - return; + return 0; } struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 2aedf78c13cf..11fe1ba6bbc9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -48,10 +48,8 @@ int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, struct ib_qp_init_attr *qp_init_attr); int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); -int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey); int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); -void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -60,7 +58,7 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index c142f5e7f25f..de57f2fed743 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -509,6 +509,20 @@ static inline int ib_send_flags_to_pvrdma(int flags) return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX); } +static inline int pvrdma_network_type_to_ib(enum pvrdma_network_type type) +{ + switch (type) { + case PVRDMA_NETWORK_ROCE_V1: + return RDMA_NETWORK_ROCE_V1; + case PVRDMA_NETWORK_IPV4: + return RDMA_NETWORK_IPV4; + case PVRDMA_NETWORK_IPV6: + return RDMA_NETWORK_IPV6; + default: + return RDMA_NETWORK_IPV6; + } +} + void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, const struct pvrdma_qp_cap *src); void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 4f6cc0de7ef9..6aa40bd2fd52 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -119,6 +119,9 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); + if (attr->flags) + return -EOPNOTSUPP; + entries = roundup_pow_of_two(entries); if (entries < 1 || entries > dev->dsr->caps.max_cqe) return -EINVAL; @@ -142,7 +145,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_cq; } - npages = ib_umem_page_count(cq->umem); + npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE); } else { /* One extra page for shared ring state */ npages = 1 + (entries * sizeof(struct pvrdma_cqe) + @@ -235,7 +238,7 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) * @cq: the completion queue to destroy. * @udata: user data or null for kernel object */ -void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct pvrdma_cq *vcq = to_vcq(cq); union pvrdma_cmd_req req; @@ -261,6 +264,7 @@ void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) pvrdma_free_cq(dev, vcq); atomic_dec(&dev->num_cqs); + return 0; } static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) @@ -363,7 +367,7 @@ retry: wc->dlid_path_bits = cqe->dlid_path_bits; wc->port_num = cqe->port_num; wc->vendor_err = cqe->vendor_err; - wc->network_hdr_type = cqe->network_hdr_type; + wc->network_hdr_type = pvrdma_network_type_to_ib(cqe->network_hdr_type); /* Update shared ring state */ pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); @@ -375,7 +379,7 @@ retry: * pvrdma_poll_cq - poll for work completion queue entries * @ibcq: completion queue * @num_entries: the maximum number of entries - * @entry: pointer to work completion array + * @wc: pointer to work completion array * * @return: number of polled completion entries */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 780fd2dfc07e..00a330909bb3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -68,21 +68,21 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); + return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); } static DEVICE_ATTR_RO(hca_type); static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_REV_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); } static DEVICE_ATTR_RO(hw_rev); static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); } static DEVICE_ATTR_RO(board_id); @@ -205,27 +205,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->flags = 0; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; @@ -249,13 +228,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) /* Check if SRQ is supported by backend */ if (dev->dsr->caps.max_srq) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, @@ -266,11 +238,11 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) } ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1); if (ret) - return ret; + goto err_srq_free; spin_lock_init(&dev->srq_tbl_lock); rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group); - ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d"); + ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev); if (ret) goto err_srq_free; @@ -854,7 +826,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, goto err_free_resource; } } - + dma_set_max_seg_size(&pdev->dev, UINT_MAX); pci_set_master(pdev); /* Map register space */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c index 7944c58ded0e..ba43ad07898c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c @@ -182,17 +182,16 @@ int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx, int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir, struct ib_umem *umem, u64 offset) { + struct ib_block_iter biter; u64 i = offset; int ret = 0; - struct sg_dma_page_iter sg_iter; if (offset >= pdir->npages) return -EINVAL; - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - dma_addr_t addr = sg_page_iter_dma_address(&sg_iter); - - ret = pvrdma_page_dir_insert_dma(pdir, i, addr); + rdma_umem_for_each_dma_block (umem, &biter, PAGE_SIZE) { + ret = pvrdma_page_dir_insert_dma( + pdir, i, rdma_block_iter_dma_address(&biter)); if (ret) goto exit; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index 77a010e68208..e80848bfb3bd 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -133,7 +133,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_CAST(umem); } - npages = ib_umem_num_pages(umem); + npages = ib_umem_num_dma_blocks(umem, PAGE_SIZE); if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", npages); @@ -270,6 +270,7 @@ freemr: /** * pvrdma_dereg_mr - deregister a memory region * @ibmr: memory region + * @udata: pointer to user data * * @return: 0 on success. */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 9a8f2a9507be..1d3bdd7bb51d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -209,7 +209,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, dev_warn(&dev->pdev->dev, "invalid create queuepair flags %#x\n", init_attr->create_flags); - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } if (init_attr->qp_type != IB_QPT_RC && @@ -232,8 +232,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, switch (init_attr->qp_type) { case IB_QPT_GSI: if (init_attr->port_num == 0 || - init_attr->port_num > pd->device->phys_port_cnt || - udata) { + init_attr->port_num > pd->device->phys_port_cnt) { dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n"); ret = -EINVAL; goto err_qp; @@ -298,9 +297,11 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } - qp->npages_send = ib_umem_page_count(qp->sumem); + qp->npages_send = + ib_umem_num_dma_blocks(qp->sumem, PAGE_SIZE); if (!is_srq) - qp->npages_recv = ib_umem_page_count(qp->rumem); + qp->npages_recv = ib_umem_num_dma_blocks( + qp->rumem, PAGE_SIZE); else qp->npages_recv = 0; qp->npages = qp->npages_send + qp->npages_recv; @@ -543,6 +544,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_state cur_state, next_state; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* Sanity checking. Should need lock here */ mutex_lock(&qp->mutex); cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state : diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index d330decfb80a..bdc2703532c6 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -90,7 +90,7 @@ int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) /** * pvrdma_create_srq - create shared receive queue - * @pd: protection domain + * @ibsrq: the IB shared receive queue * @init_attr: shared receive queue attributes * @udata: user data * @@ -121,7 +121,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, dev_warn(&dev->pdev->dev, "shared receive queue type %d not supported\n", init_attr->srq_type); - return -EINVAL; + return -EOPNOTSUPP; } if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || @@ -152,7 +152,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err_srq; } - srq->npages = ib_umem_page_count(srq->umem); + srq->npages = ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE); if (srq->npages < 0 || srq->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { dev_warn(&dev->pdev->dev, @@ -240,7 +240,7 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) * * @return: 0 for success. */ -void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct pvrdma_srq *vsrq = to_vsrq(srq); union pvrdma_cmd_req req; @@ -259,6 +259,7 @@ void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) ret); pvrdma_free_srq(dev, vsrq); + return 0; } /** diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index ccbded2d26ce..fc412cbfd042 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -479,9 +479,9 @@ err: * @pd: the protection domain to be released * @udata: user data or null for kernel object * - * @return: 0 on success, otherwise errno. + * @return: Always 0 */ -void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); union pvrdma_cmd_req req = {}; @@ -498,14 +498,14 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) ret); atomic_dec(&dev->num_pds); + return 0; } /** * pvrdma_create_ah - create an address handle - * @pd: the protection domain - * @ah_attr: the attributes of the AH - * @udata: user data blob - * @flags: create address handle flags (see enum rdma_create_ah_flags) + * @ibah: the IB address handle + * @init_attr: the attributes of the AH + * @udata: pointer to user data * * @return: 0 on success, otherwise errno. */ @@ -548,9 +548,10 @@ int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * */ -void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) { struct pvrdma_dev *dev = to_vdev(ah->device); atomic_dec(&dev->num_ahs); + return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 699b20849a7e..97ed8f952f6e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -399,7 +399,7 @@ int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void pvrdma_dealloc_ucontext(struct ib_ucontext *context); int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, @@ -411,19 +411,19 @@ int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); int pvrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, |
