summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/cma.c44
-rw-r--r--drivers/infiniband/core/uverbs.h4
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c250
-rw-r--r--drivers/infiniband/core/uverbs_main.c42
-rw-r--r--drivers/infiniband/core/verbs.c30
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.c18
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cm.c16
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c46
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig2
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c860
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c329
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c116
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h9
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c41
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h29
-rw-r--r--drivers/infiniband/hw/mlx4/main.c235
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h12
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c153
-rw-r--r--drivers/infiniband/hw/qib/qib.h5
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h32
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h3
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c909
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c9
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c19
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h73
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c139
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c231
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c292
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c288
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_msg.h17
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h23
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c3
-rw-r--r--include/linux/mlx4/device.h5
-rw-r--r--include/rdma/ib_verbs.h128
-rw-r--r--include/rdma/iw_cm.h8
-rw-r--r--include/uapi/rdma/ib_user_verbs.h99
44 files changed, 3536 insertions, 1024 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 7c0f9535fb7d..3a2c3c3bf723 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1385,8 +1385,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
{
struct rdma_id_private *id_priv = iw_id->context;
struct rdma_cm_event event;
- struct sockaddr_in *sin;
int ret = 0;
+ struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
+ struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
return 0;
@@ -1397,10 +1398,10 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
event.event = RDMA_CM_EVENT_DISCONNECTED;
break;
case IW_CM_EVENT_CONNECT_REPLY:
- sin = (struct sockaddr_in *) cma_src_addr(id_priv);
- *sin = iw_event->local_addr;
- sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
- *sin = iw_event->remote_addr;
+ memcpy(cma_src_addr(id_priv), laddr,
+ rdma_addr_size(laddr));
+ memcpy(cma_dst_addr(id_priv), raddr,
+ rdma_addr_size(raddr));
switch (iw_event->status) {
case 0:
event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -1450,11 +1451,12 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
{
struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
- struct sockaddr_in *sin;
struct net_device *dev = NULL;
struct rdma_cm_event event;
int ret;
struct ib_device_attr attr;
+ struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
+ struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
listen_id = cm_id->context;
if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
@@ -1472,14 +1474,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
- dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
- if (!dev) {
- ret = -EADDRNOTAVAIL;
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
- }
- ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+ ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -1497,10 +1492,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
cm_id->context = conn_id;
cm_id->cm_handler = cma_iw_handler;
- sin = (struct sockaddr_in *) cma_src_addr(conn_id);
- *sin = iw_event->local_addr;
- sin = (struct sockaddr_in *) cma_dst_addr(conn_id);
- *sin = iw_event->remote_addr;
+ memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
+ memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
ret = ib_query_device(conn_id->id.device, &attr);
if (ret) {
@@ -1576,7 +1569,6 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
{
int ret;
- struct sockaddr_in *sin;
struct iw_cm_id *id;
id = iw_create_cm_id(id_priv->id.device,
@@ -1587,8 +1579,8 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
id_priv->cm_id.iw = id;
- sin = (struct sockaddr_in *) cma_src_addr(id_priv);
- id_priv->cm_id.iw->local_addr = *sin;
+ memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
+ rdma_addr_size(cma_src_addr(id_priv)));
ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
@@ -2803,7 +2795,6 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct iw_cm_id *cm_id;
- struct sockaddr_in* sin;
int ret;
struct iw_cm_conn_param iw_param;
@@ -2813,11 +2804,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
id_priv->cm_id.iw = cm_id;
- sin = (struct sockaddr_in *) cma_src_addr(id_priv);
- cm_id->local_addr = *sin;
-
- sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
- cm_id->remote_addr = *sin;
+ memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
+ rdma_addr_size(cma_src_addr(id_priv)));
+ memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv),
+ rdma_addr_size(cma_dst_addr(id_priv)));
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 0fcd7aa26fa2..d040b877475f 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -135,6 +135,7 @@ struct ib_usrq_object {
struct ib_uqp_object {
struct ib_uevent_object uevent;
struct list_head mcast_list;
+ struct ib_uxrcd_object *uxrcd;
};
struct ib_ucq_object {
@@ -155,6 +156,7 @@ extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
extern struct idr ib_uverbs_xrcd_idr;
+extern struct idr ib_uverbs_rule_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
@@ -215,5 +217,7 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
IB_UVERBS_DECLARE_CMD(create_xsrq);
IB_UVERBS_DECLARE_CMD(open_xrcd);
IB_UVERBS_DECLARE_CMD(close_xrcd);
+IB_UVERBS_DECLARE_CMD(create_flow);
+IB_UVERBS_DECLARE_CMD(destroy_flow);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b3c07b0c9f26..f2b81b9ee0d6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -54,6 +54,7 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
+static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
do { \
@@ -330,6 +331,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
INIT_LIST_HEAD(&ucontext->xrcd_list);
+ INIT_LIST_HEAD(&ucontext->rule_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -1526,7 +1528,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ obj = kzalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
@@ -1642,8 +1644,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
goto err_copy;
}
- if (xrcd)
+ if (xrcd) {
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
put_xrcd_read(xrcd_uobj);
+ }
+
if (pd)
put_pd_read(pd);
if (scq)
@@ -1753,6 +1760,8 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
goto err_remove;
}
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
put_xrcd_read(xrcd_uobj);
mutex_lock(&file->mutex);
@@ -2019,6 +2028,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
if (ret)
return ret;
+ if (obj->uxrcd)
+ atomic_dec(&obj->uxrcd->refcnt);
+
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
mutex_lock(&file->mutex);
@@ -2587,6 +2599,232 @@ out_put:
return ret ? ret : in_len;
}
+static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
+ if (ib_spec->eth.size != kern_spec->eth.size)
+ return -EINVAL;
+ memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
+ sizeof(struct ib_flow_eth_filter));
+ memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
+ sizeof(struct ib_flow_eth_filter));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
+ if (ib_spec->ipv4.size != kern_spec->ipv4.size)
+ return -EINVAL;
+ memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
+ sizeof(struct ib_flow_ipv4_filter));
+ memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
+ sizeof(struct ib_flow_ipv4_filter));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
+ if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
+ return -EINVAL;
+ memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_flow cmd;
+ struct ib_uverbs_create_flow_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_flow *flow_id;
+ struct ib_kern_flow_attr *kern_flow_attr;
+ struct ib_flow_attr *flow_attr;
+ struct ib_qp *qp;
+ int err = 0;
+ void *kern_spec;
+ void *ib_spec;
+ int i;
+ int kern_attr_size;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
+ !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
+ return -EPERM;
+
+ if (cmd.flow_attr.num_of_specs < 0 ||
+ cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+ return -EINVAL;
+
+ kern_attr_size = cmd.flow_attr.size - sizeof(cmd) -
+ sizeof(struct ib_uverbs_cmd_hdr_ex);
+
+ if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
+ kern_attr_size < 0 || kern_attr_size >
+ (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
+ return -EINVAL;
+
+ if (cmd.flow_attr.num_of_specs) {
+ kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+ if (!kern_flow_attr)
+ return -ENOMEM;
+
+ memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
+ if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd),
+ kern_attr_size)) {
+ err = -EFAULT;
+ goto err_free_attr;
+ }
+ } else {
+ kern_flow_attr = &cmd.flow_attr;
+ kern_attr_size = sizeof(cmd.flow_attr);
+ }
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj) {
+ err = -ENOMEM;
+ goto err_free_attr;
+ }
+ init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
+ down_write(&uobj->mutex);
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp) {
+ err = -EINVAL;
+ goto err_uobj;
+ }
+
+ flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+ if (!flow_attr) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
+ flow_attr->type = kern_flow_attr->type;
+ flow_attr->priority = kern_flow_attr->priority;
+ flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
+ flow_attr->port = kern_flow_attr->port;
+ flow_attr->flags = kern_flow_attr->flags;
+ flow_attr->size = sizeof(*flow_attr);
+
+ kern_spec = kern_flow_attr + 1;
+ ib_spec = flow_attr + 1;
+ for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) {
+ err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ if (err)
+ goto err_free;
+ flow_attr->size +=
+ ((union ib_flow_spec *) ib_spec)->size;
+ kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size;
+ kern_spec += ((struct ib_kern_spec *) kern_spec)->size;
+ ib_spec += ((union ib_flow_spec *) ib_spec)->size;
+ }
+ if (kern_attr_size) {
+ pr_warn("create flow failed, %d bytes left from uverb cmd\n",
+ kern_attr_size);
+ goto err_free;
+ }
+ flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+ if (IS_ERR(flow_id)) {
+ err = PTR_ERR(flow_id);
+ goto err_free;
+ }
+ flow_id->qp = qp;
+ flow_id->uobject = uobj;
+ uobj->object = flow_id;
+
+ err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
+ if (err)
+ goto destroy_flow;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.flow_handle = uobj->id;
+
+ if (copy_to_user((void __user *)(unsigned long) cmd.response,
+ &resp, sizeof(resp))) {
+ err = -EFAULT;
+ goto err_copy;
+ }
+
+ put_qp_read(qp);
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->rule_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+ kfree(flow_attr);
+ if (cmd.flow_attr.num_of_specs)
+ kfree(kern_flow_attr);
+ return in_len;
+err_copy:
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+destroy_flow:
+ ib_destroy_flow(flow_id);
+err_free:
+ kfree(flow_attr);
+err_put:
+ put_qp_read(qp);
+err_uobj:
+ put_uobj_write(uobj);
+err_free_attr:
+ if (cmd.flow_attr.num_of_specs)
+ kfree(kern_flow_attr);
+ return err;
+}
+
+ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len) {
+ struct ib_uverbs_destroy_flow cmd;
+ struct ib_flow *flow_id;
+ struct ib_uobject *uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
+ file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ flow_id = uobj->object;
+
+ ret = ib_destroy_flow(flow_id);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ return ret ? ret : in_len;
+}
+
static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
@@ -2860,6 +3098,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
+ struct ib_usrq_object *us;
+ enum ib_srq_type srq_type;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -2869,6 +3109,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
return -EINVAL;
srq = uobj->object;
obj = container_of(uobj, struct ib_uevent_object, uobject);
+ srq_type = srq->srq_type;
ret = ib_destroy_srq(srq);
if (!ret)
@@ -2879,6 +3120,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (ret)
return ret;
+ if (srq_type == IB_SRQT_XRC) {
+ us = container_of(obj, struct ib_usrq_object, uevent);
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
mutex_lock(&file->mutex);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2c6f0f2ecd9d..75ad86c4abf8 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -73,6 +73,7 @@ DEFINE_IDR(ib_uverbs_cq_idr);
DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
DEFINE_IDR(ib_uverbs_xrcd_idr);
+DEFINE_IDR(ib_uverbs_rule_idr);
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -113,7 +114,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
[IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
[IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
- [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp
+ [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
+ [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow,
+ [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -212,6 +215,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uobj);
}
+ list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
+ struct ib_flow *flow_id = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+ ib_destroy_flow(flow_id);
+ kfree(uobj);
+ }
+
list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
struct ib_qp *qp = uobj->object;
struct ib_uqp_object *uqp =
@@ -583,9 +594,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
- if (hdr.in_words * 4 != count)
- return -EINVAL;
-
if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
!uverbs_cmd_table[hdr.command])
return -EINVAL;
@@ -597,8 +605,30 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
return -ENOSYS;
- return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
- hdr.in_words * 4, hdr.out_words * 4);
+ if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
+ struct ib_uverbs_cmd_hdr_ex hdr_ex;
+
+ if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex)))
+ return -EFAULT;
+
+ if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count)
+ return -EINVAL;
+
+ return uverbs_cmd_table[hdr.command](file,
+ buf + sizeof(hdr_ex),
+ (hdr_ex.in_words +
+ hdr_ex.provider_in_words) * 4,
+ (hdr_ex.out_words +
+ hdr_ex.provider_out_words) * 4);
+ } else {
+ if (hdr.in_words * 4 != count)
+ return -EINVAL;
+
+ return uverbs_cmd_table[hdr.command](file,
+ buf + sizeof(hdr),
+ hdr.in_words * 4,
+ hdr.out_words * 4);
+ }
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 22192deb8828..a321df28bab2 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -346,10 +346,13 @@ EXPORT_SYMBOL(ib_destroy_srq);
static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
{
struct ib_qp *qp = context;
+ unsigned long flags;
+ spin_lock_irqsave(&qp->device->event_handler_lock, flags);
list_for_each_entry(event->element.qp, &qp->open_list, open_list)
if (event->element.qp->event_handler)
event->element.qp->event_handler(event, event->element.qp->qp_context);
+ spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
}
static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
@@ -1254,3 +1257,30 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
return xrcd->device->dealloc_xrcd(xrcd);
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
+
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ struct ib_flow *flow_id;
+ if (!qp->device->create_flow)
+ return ERR_PTR(-ENOSYS);
+
+ flow_id = qp->device->create_flow(qp, flow_attr, domain);
+ if (!IS_ERR(flow_id))
+ atomic_inc(&qp->usecnt);
+ return flow_id;
+}
+EXPORT_SYMBOL(ib_create_flow);
+
+int ib_destroy_flow(struct ib_flow *flow_id)
+{
+ int err;
+ struct ib_qp *qp = flow_id->qp;
+
+ err = qp->device->destroy_flow(flow_id);
+ if (!err)
+ atomic_dec(&qp->usecnt);
+ return err;
+}
+EXPORT_SYMBOL(ib_destroy_flow);
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index 706cf97cbe8f..d5d1929753e4 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -155,6 +155,8 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
enum c2_event_id event_id;
unsigned long flags;
int status;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr;
/*
* retrieve the message
@@ -206,10 +208,10 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
case CCAE_ACTIVE_CONNECT_RESULTS:
res = &wr->ae.ae_active_connect_results;
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.local_addr.sin_addr.s_addr = res->laddr;
- cm_event.remote_addr.sin_addr.s_addr = res->raddr;
- cm_event.local_addr.sin_port = res->lport;
- cm_event.remote_addr.sin_port = res->rport;
+ laddr->sin_addr.s_addr = res->laddr;
+ raddr->sin_addr.s_addr = res->raddr;
+ laddr->sin_port = res->lport;
+ raddr->sin_port = res->rport;
if (status == 0) {
cm_event.private_data_len =
be32_to_cpu(res->private_data_length);
@@ -281,10 +283,10 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
}
cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
- cm_event.local_addr.sin_addr.s_addr = req->laddr;
- cm_event.remote_addr.sin_addr.s_addr = req->raddr;
- cm_event.local_addr.sin_port = req->lport;
- cm_event.remote_addr.sin_port = req->rport;
+ laddr->sin_addr.s_addr = req->laddr;
+ raddr->sin_addr.s_addr = req->raddr;
+ laddr->sin_port = req->lport;
+ raddr->sin_port = req->rport;
cm_event.private_data_len =
be32_to_cpu(req->private_data_length);
cm_event.private_data = req->private_data;
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
index 95f58ab1e0b8..23bfa94fbd4e 100644
--- a/drivers/infiniband/hw/amso1100/c2_cm.c
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -46,6 +46,10 @@ int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
struct c2_vq_req *vq_req;
int err;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+
+ if (cm_id->remote_addr.ss_family != AF_INET)
+ return -ENOSYS;
ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
if (!ibqp)
@@ -91,8 +95,8 @@ int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
wr->rnic_handle = c2dev->adapter_handle;
wr->qp_handle = qp->adapter_handle;
- wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
- wr->remote_port = cm_id->remote_addr.sin_port;
+ wr->remote_addr = raddr->sin_addr.s_addr;
+ wr->remote_port = raddr->sin_port;
/*
* Move any private data from the callers's buf into
@@ -135,6 +139,10 @@ int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
struct c2wr_ep_listen_create_rep *reply;
struct c2_vq_req *vq_req;
int err;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+
+ if (cm_id->local_addr.ss_family != AF_INET)
+ return -ENOSYS;
c2dev = to_c2dev(cm_id->device);
if (c2dev == NULL)
@@ -153,8 +161,8 @@ int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
wr.hdr.context = (u64) (unsigned long) vq_req;
wr.rnic_handle = c2dev->adapter_handle;
- wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
- wr.local_port = cm_id->local_addr.sin_port;
+ wr.local_addr = laddr->sin_addr.s_addr;
+ wr.local_port = laddr->sin_port;
wr.backlog = cpu_to_be32(backlog);
wr.user_context = (u64) (unsigned long) cm_id;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 3e094cd6a0e3..095bb046e2c8 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -721,8 +721,10 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status)
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
- event.local_addr = ep->com.local_addr;
- event.remote_addr = ep->com.remote_addr;
+ memcpy(&event.local_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&event.remote_addr, &ep->com.remote_addr,
+ sizeof(ep->com.remote_addr));
if ((status == 0) || (status == -ECONNREFUSED)) {
event.private_data_len = ep->plen;
@@ -747,8 +749,10 @@ static void connect_request_upcall(struct iwch_ep *ep)
PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
- event.local_addr = ep->com.local_addr;
- event.remote_addr = ep->com.remote_addr;
+ memcpy(&event.local_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&event.remote_addr, &ep->com.remote_addr,
+ sizeof(ep->com.local_addr));
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
event.provider_data = ep;
@@ -1872,8 +1876,9 @@ err:
static int is_loopback_dst(struct iw_cm_id *cm_id)
{
struct net_device *dev;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
- dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr);
+ dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
if (!dev)
return 0;
dev_put(dev);
@@ -1886,6 +1891,13 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct iwch_ep *ep;
struct rtable *rt;
int err = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+
+ if (cm_id->remote_addr.ss_family != PF_INET) {
+ err = -ENOSYS;
+ goto out;
+ }
if (is_loopback_dst(cm_id)) {
err = -ENOSYS;
@@ -1929,11 +1941,9 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
/* find a route */
- rt = find_route(h->rdev.t3cdev_p,
- cm_id->local_addr.sin_addr.s_addr,
- cm_id->remote_addr.sin_addr.s_addr,
- cm_id->local_addr.sin_port,
- cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
+ rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr, laddr->sin_port,
+ raddr->sin_port, IPTOS_LOWDELAY);
if (!rt) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
@@ -1941,7 +1951,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
ep->dst = &rt->dst;
ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
- &cm_id->remote_addr.sin_addr.s_addr);
+ &raddr->sin_addr.s_addr);
if (!ep->l2t) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
err = -ENOMEM;
@@ -1950,8 +1960,10 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
state_set(&ep->com, CONNECTING);
ep->tos = IPTOS_LOWDELAY;
- ep->com.local_addr = cm_id->local_addr;
- ep->com.remote_addr = cm_id->remote_addr;
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+ sizeof(ep->com.remote_addr));
/* send connect request to rnic */
err = send_connect(ep);
@@ -1979,6 +1991,11 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
might_sleep();
+ if (cm_id->local_addr.ss_family != PF_INET) {
+ err = -ENOSYS;
+ goto fail1;
+ }
+
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
@@ -1990,7 +2007,8 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
ep->backlog = backlog;
- ep->com.local_addr = cm_id->local_addr;
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
/*
* Allocate a server TID.
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index 6b7e6c543534..d4e8983fba53 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_CXGB4
tristate "Chelsio T4 RDMA Driver"
- depends on CHELSIO_T4 && INET
+ depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
select GENERIC_ALLOCATOR
---help---
This is an iWARP/RDMA driver for the Chelsio T4 1GbE and
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 65c30ea8c1a1..12fef76c791c 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -44,6 +44,8 @@
#include <net/netevent.h>
#include <net/route.h>
#include <net/tcp.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
#include "iw_cxgb4.h"
@@ -330,22 +332,80 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
} else {
skb = alloc_skb(len, gfp);
}
+ t4_set_arp_err_handler(skb, NULL, NULL);
return skb;
}
-static struct rtable *find_route(struct c4iw_dev *dev, __be32 local_ip,
+static struct net_device *get_real_dev(struct net_device *egress_dev)
+{
+ struct net_device *phys_dev = egress_dev;
+ if (egress_dev->priv_flags & IFF_802_1Q_VLAN)
+ phys_dev = vlan_dev_real_dev(egress_dev);
+ return phys_dev;
+}
+
+static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
+{
+ int i;
+
+ egress_dev = get_real_dev(egress_dev);
+ for (i = 0; i < dev->rdev.lldi.nports; i++)
+ if (dev->rdev.lldi.ports[i] == egress_dev)
+ return 1;
+ return 0;
+}
+
+static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
+ __u8 *peer_ip, __be16 local_port,
+ __be16 peer_port, u8 tos,
+ __u32 sin6_scope_id)
+{
+ struct dst_entry *dst = NULL;
+
+ if (IS_ENABLED(CONFIG_IPV6)) {
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ memcpy(&fl6.daddr, peer_ip, 16);
+ memcpy(&fl6.saddr, local_ip, 16);
+ if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ fl6.flowi6_oif = sin6_scope_id;
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+ if (!dst)
+ goto out;
+ if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
+ !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
+ dst_release(dst);
+ dst = NULL;
+ }
+ }
+
+out:
+ return dst;
+}
+
+static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
__be32 peer_ip, __be16 local_port,
__be16 peer_port, u8 tos)
{
struct rtable *rt;
struct flowi4 fl4;
+ struct neighbour *n;
rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
peer_port, local_port, IPPROTO_TCP,
tos, 0);
if (IS_ERR(rt))
return NULL;
- return rt;
+ n = dst_neigh_lookup(&rt->dst, &peer_ip);
+ if (!n)
+ return NULL;
+ if (!our_interface(dev, n->dev)) {
+ dst_release(&rt->dst);
+ return NULL;
+ }
+ neigh_release(n);
+ return &rt->dst;
}
static void arp_failure_discard(void *handle, struct sk_buff *skb)
@@ -487,7 +547,7 @@ static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst,
ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC;
else {
ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC;
- ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC;
+ ntuple |= 1 << FILTER_SEL_WIDTH_TAG_P_FC;
}
ntuple |= l2t->lport << S_PORT | IPPROTO_TCP <<
FILTER_SEL_WIDTH_VLD_TAG_P_FC;
@@ -512,15 +572,28 @@ static int send_connect(struct c4iw_ep *ep)
{
struct cpl_act_open_req *req;
struct cpl_t5_act_open_req *t5_req;
+ struct cpl_act_open_req6 *req6;
+ struct cpl_t5_act_open_req6 *t5_req6;
struct sk_buff *skb;
u64 opt0;
u32 opt2;
unsigned int mtu_idx;
int wscale;
- int size = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
- sizeof(struct cpl_act_open_req) :
- sizeof(struct cpl_t5_act_open_req);
- int wrlen = roundup(size, 16);
+ int wrlen;
+ int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
+ sizeof(struct cpl_act_open_req) :
+ sizeof(struct cpl_t5_act_open_req);
+ int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
+ sizeof(struct cpl_act_open_req6) :
+ sizeof(struct cpl_t5_act_open_req6);
+ struct sockaddr_in *la = (struct sockaddr_in *)&ep->com.local_addr;
+ struct sockaddr_in *ra = (struct sockaddr_in *)&ep->com.remote_addr;
+ struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+ struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+
+ wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
+ roundup(sizev4, 16) :
+ roundup(sizev6, 16);
PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
@@ -557,33 +630,82 @@ static int send_connect(struct c4iw_ep *ep)
t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
- req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
- INIT_TP_WR(req, 0);
- OPCODE_TID(req) = cpu_to_be32(
- MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
- ((ep->rss_qid << 14) | ep->atid)));
- req->local_port = ep->com.local_addr.sin_port;
- req->peer_port = ep->com.remote_addr.sin_port;
- req->local_ip = ep->com.local_addr.sin_addr.s_addr;
- req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
- req->opt0 = cpu_to_be64(opt0);
- req->params = cpu_to_be32(select_ntuple(ep->com.dev,
- ep->dst, ep->l2t));
- req->opt2 = cpu_to_be32(opt2);
+ if (ep->com.remote_addr.ss_family == AF_INET) {
+ req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
+ INIT_TP_WR(req, 0);
+ OPCODE_TID(req) = cpu_to_be32(
+ MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
+ ((ep->rss_qid << 14) | ep->atid)));
+ req->local_port = la->sin_port;
+ req->peer_port = ra->sin_port;
+ req->local_ip = la->sin_addr.s_addr;
+ req->peer_ip = ra->sin_addr.s_addr;
+ req->opt0 = cpu_to_be64(opt0);
+ req->params = cpu_to_be32(select_ntuple(ep->com.dev,
+ ep->dst, ep->l2t));
+ req->opt2 = cpu_to_be32(opt2);
+ } else {
+ req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
+
+ INIT_TP_WR(req6, 0);
+ OPCODE_TID(req6) = cpu_to_be32(
+ MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+ ((ep->rss_qid<<14)|ep->atid)));
+ req6->local_port = la6->sin6_port;
+ req6->peer_port = ra6->sin6_port;
+ req6->local_ip_hi = *((__be64 *)
+ (la6->sin6_addr.s6_addr));
+ req6->local_ip_lo = *((__be64 *)
+ (la6->sin6_addr.s6_addr + 8));
+ req6->peer_ip_hi = *((__be64 *)
+ (ra6->sin6_addr.s6_addr));
+ req6->peer_ip_lo = *((__be64 *)
+ (ra6->sin6_addr.s6_addr + 8));
+ req6->opt0 = cpu_to_be64(opt0);
+ req6->params = cpu_to_be32(
+ select_ntuple(ep->com.dev, ep->dst,
+ ep->l2t));
+ req6->opt2 = cpu_to_be32(opt2);
+ }
} else {
- t5_req = (struct cpl_t5_act_open_req *) skb_put(skb, wrlen);
- INIT_TP_WR(t5_req, 0);
- OPCODE_TID(t5_req) = cpu_to_be32(
+ if (ep->com.remote_addr.ss_family == AF_INET) {
+ t5_req = (struct cpl_t5_act_open_req *)
+ skb_put(skb, wrlen);
+ INIT_TP_WR(t5_req, 0);
+ OPCODE_TID(t5_req) = cpu_to_be32(
MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
((ep->rss_qid << 14) | ep->atid)));
- t5_req->local_port = ep->com.local_addr.sin_port;
- t5_req->peer_port = ep->com.remote_addr.sin_port;
- t5_req->local_ip = ep->com.local_addr.sin_addr.s_addr;
- t5_req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
- t5_req->opt0 = cpu_to_be64(opt0);
- t5_req->params = cpu_to_be64(V_FILTER_TUPLE(
- select_ntuple(ep->com.dev, ep->dst, ep->l2t)));
- t5_req->opt2 = cpu_to_be32(opt2);
+ t5_req->local_port = la->sin_port;
+ t5_req->peer_port = ra->sin_port;
+ t5_req->local_ip = la->sin_addr.s_addr;
+ t5_req->peer_ip = ra->sin_addr.s_addr;
+ t5_req->opt0 = cpu_to_be64(opt0);
+ t5_req->params = cpu_to_be64(V_FILTER_TUPLE(
+ select_ntuple(ep->com.dev,
+ ep->dst, ep->l2t)));
+ t5_req->opt2 = cpu_to_be32(opt2);
+ } else {
+ t5_req6 = (struct cpl_t5_act_open_req6 *)
+ skb_put(skb, wrlen);
+ INIT_TP_WR(t5_req6, 0);
+ OPCODE_TID(t5_req6) = cpu_to_be32(
+ MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+ ((ep->rss_qid<<14)|ep->atid)));
+ t5_req6->local_port = la6->sin6_port;
+ t5_req6->peer_port = ra6->sin6_port;
+ t5_req6->local_ip_hi = *((__be64 *)
+ (la6->sin6_addr.s6_addr));
+ t5_req6->local_ip_lo = *((__be64 *)
+ (la6->sin6_addr.s6_addr + 8));
+ t5_req6->peer_ip_hi = *((__be64 *)
+ (ra6->sin6_addr.s6_addr));
+ t5_req6->peer_ip_lo = *((__be64 *)
+ (ra6->sin6_addr.s6_addr + 8));
+ t5_req6->opt0 = cpu_to_be64(opt0);
+ t5_req6->params = (__force __be64)cpu_to_be32(
+ select_ntuple(ep->com.dev, ep->dst, ep->l2t));
+ t5_req6->opt2 = cpu_to_be32(opt2);
+ }
}
set_bit(ACT_OPEN_REQ, &ep->com.history);
@@ -952,8 +1074,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
- event.local_addr = ep->com.local_addr;
- event.remote_addr = ep->com.remote_addr;
+ memcpy(&event.local_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&event.remote_addr, &ep->com.remote_addr,
+ sizeof(ep->com.remote_addr));
if ((status == 0) || (status == -ECONNREFUSED)) {
if (!ep->tried_with_mpa_v1) {
@@ -989,8 +1113,10 @@ static void connect_request_upcall(struct c4iw_ep *ep)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
- event.local_addr = ep->com.local_addr;
- event.remote_addr = ep->com.remote_addr;
+ memcpy(&event.local_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&event.remote_addr, &ep->com.remote_addr,
+ sizeof(ep->com.remote_addr));
event.provider_data = ep;
if (!ep->tried_with_mpa_v1) {
/* this means MPA_v2 is used */
@@ -1447,10 +1573,9 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
" qpid %u ep %p state %d tid %u status %d\n",
__func__, ep->com.qp->wq.sq.qid, ep,
state_read(&ep->com), ep->hwtid, status);
- attrs.next_state = C4IW_QP_STATE_ERROR;
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
- C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
- c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
break;
}
default:
@@ -1498,6 +1623,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
struct fw_ofld_connection_wr *req;
unsigned int mtu_idx;
int wscale;
+ struct sockaddr_in *sin;
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
@@ -1506,10 +1632,12 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst,
ep->l2t));
- req->le.lport = ep->com.local_addr.sin_port;
- req->le.pport = ep->com.remote_addr.sin_port;
- req->le.u.ipv4.lip = ep->com.local_addr.sin_addr.s_addr;
- req->le.u.ipv4.pip = ep->com.remote_addr.sin_addr.s_addr;
+ sin = (struct sockaddr_in *)&ep->com.local_addr;
+ req->le.lport = sin->sin_port;
+ req->le.u.ipv4.lip = sin->sin_addr.s_addr;
+ sin = (struct sockaddr_in *)&ep->com.remote_addr;
+ req->le.pport = sin->sin_port;
+ req->le.u.ipv4.pip = sin->sin_addr.s_addr;
req->tcb.t_state_to_astid =
htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) |
V_FW_OFLD_CONNECTION_WR_ASTID(atid));
@@ -1560,14 +1688,98 @@ static inline int act_open_has_tid(int status)
#define ACT_OPEN_RETRY_COUNT 2
+static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
+ struct dst_entry *dst, struct c4iw_dev *cdev,
+ bool clear_mpa_v1)
+{
+ struct neighbour *n;
+ int err, step;
+ struct net_device *pdev;
+
+ n = dst_neigh_lookup(dst, peer_ip);
+ if (!n)
+ return -ENODEV;
+
+ rcu_read_lock();
+ err = -ENOMEM;
+ if (n->dev->flags & IFF_LOOPBACK) {
+ if (iptype == 4)
+ pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
+ else if (IS_ENABLED(CONFIG_IPV6))
+ for_each_netdev(&init_net, pdev) {
+ if (ipv6_chk_addr(&init_net,
+ (struct in6_addr *)peer_ip,
+ pdev, 1))
+ break;
+ }
+ else
+ pdev = NULL;
+
+ if (!pdev) {
+ err = -ENODEV;
+ goto out;
+ }
+ ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
+ n, pdev, 0);
+ if (!ep->l2t)
+ goto out;
+ ep->mtu = pdev->mtu;
+ ep->tx_chan = cxgb4_port_chan(pdev);
+ ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+ step = cdev->rdev.lldi.ntxq /
+ cdev->rdev.lldi.nchan;
+ ep->txq_idx = cxgb4_port_idx(pdev) * step;
+ step = cdev->rdev.lldi.nrxq /
+ cdev->rdev.lldi.nchan;
+ ep->ctrlq_idx = cxgb4_port_idx(pdev);
+ ep->rss_qid = cdev->rdev.lldi.rxq_ids[
+ cxgb4_port_idx(pdev) * step];
+ dev_put(pdev);
+ } else {
+ pdev = get_real_dev(n->dev);
+ ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
+ n, pdev, 0);
+ if (!ep->l2t)
+ goto out;
+ ep->mtu = dst_mtu(dst);
+ ep->tx_chan = cxgb4_port_chan(n->dev);
+ ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
+ step = cdev->rdev.lldi.ntxq /
+ cdev->rdev.lldi.nchan;
+ ep->txq_idx = cxgb4_port_idx(n->dev) * step;
+ ep->ctrlq_idx = cxgb4_port_idx(n->dev);
+ step = cdev->rdev.lldi.nrxq /
+ cdev->rdev.lldi.nchan;
+ ep->rss_qid = cdev->rdev.lldi.rxq_ids[
+ cxgb4_port_idx(n->dev) * step];
+
+ if (clear_mpa_v1) {
+ ep->retry_with_mpa_v1 = 0;
+ ep->tried_with_mpa_v1 = 0;
+ }
+ }
+ err = 0;
+out:
+ rcu_read_unlock();
+
+ neigh_release(n);
+
+ return err;
+}
+
static int c4iw_reconnect(struct c4iw_ep *ep)
{
int err = 0;
- struct rtable *rt;
- struct port_info *pi;
- struct net_device *pdev;
- int step;
- struct neighbour *neigh;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)
+ &ep->com.cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)
+ &ep->com.cm_id->remote_addr;
+ struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
+ &ep->com.cm_id->local_addr;
+ struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
+ &ep->com.cm_id->remote_addr;
+ int iptype;
+ __u8 *ra;
PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
init_timer(&ep->timer);
@@ -1584,57 +1796,28 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
/* find a route */
- rt = find_route(ep->com.dev,
- ep->com.cm_id->local_addr.sin_addr.s_addr,
- ep->com.cm_id->remote_addr.sin_addr.s_addr,
- ep->com.cm_id->local_addr.sin_port,
- ep->com.cm_id->remote_addr.sin_port, 0);
- if (!rt) {
+ if (ep->com.cm_id->local_addr.ss_family == AF_INET) {
+ ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr, laddr->sin_port,
+ raddr->sin_port, 0);
+ iptype = 4;
+ ra = (__u8 *)&raddr->sin_addr;
+ } else {
+ ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port, raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
+ iptype = 6;
+ ra = (__u8 *)&raddr6->sin6_addr;
+ }
+ if (!ep->dst) {
pr_err("%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
- ep->dst = &rt->dst;
-
- neigh = dst_neigh_lookup(ep->dst,
- &ep->com.cm_id->remote_addr.sin_addr.s_addr);
- if (!neigh) {
- pr_err("%s - cannot alloc neigh.\n", __func__);
- err = -ENOMEM;
- goto fail4;
- }
-
- /* get a l2t entry */
- if (neigh->dev->flags & IFF_LOOPBACK) {
- PDBG("%s LOOPBACK\n", __func__);
- pdev = ip_dev_find(&init_net,
- ep->com.cm_id->remote_addr.sin_addr.s_addr);
- ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
- neigh, pdev, 0);
- pi = (struct port_info *)netdev_priv(pdev);
- ep->mtu = pdev->mtu;
- ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
- dev_put(pdev);
- } else {
- ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
- neigh, neigh->dev, 0);
- pi = (struct port_info *)netdev_priv(neigh->dev);
- ep->mtu = dst_mtu(ep->dst);
- ep->tx_chan = cxgb4_port_chan(neigh->dev);
- ep->smac_idx = (cxgb4_port_viid(neigh->dev) &
- 0x7F) << 1;
- }
-
- step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan;
- ep->txq_idx = pi->port_id * step;
- ep->ctrlq_idx = pi->port_id;
- step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan;
- ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[pi->port_id * step];
-
- if (!ep->l2t) {
+ err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false);
+ if (err) {
pr_err("%s - cannot alloc l2e.\n", __func__);
- err = -ENOMEM;
goto fail4;
}
@@ -1677,8 +1860,16 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
ntohl(rpl->atid_status)));
struct tid_info *t = dev->rdev.lldi.tids;
int status = GET_AOPEN_STATUS(ntohl(rpl->atid_status));
+ struct sockaddr_in *la;
+ struct sockaddr_in *ra;
+ struct sockaddr_in6 *la6;
+ struct sockaddr_in6 *ra6;
ep = lookup_atid(t, atid);
+ la = (struct sockaddr_in *)&ep->com.local_addr;
+ ra = (struct sockaddr_in *)&ep->com.remote_addr;
+ la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+ ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
status, status2errno(status));
@@ -1699,10 +1890,11 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
case CPL_ERR_CONN_TIMEDOUT:
break;
case CPL_ERR_TCAM_FULL:
+ mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.tcam_full++;
- if (dev->rdev.lldi.enable_fw_ofld_conn) {
- mutex_lock(&dev->rdev.stats.lock);
- mutex_unlock(&dev->rdev.stats.lock);
+ mutex_unlock(&dev->rdev.stats.lock);
+ if (ep->com.local_addr.ss_family == AF_INET &&
+ dev->rdev.lldi.enable_fw_ofld_conn) {
send_fw_act_open_req(ep,
GET_TID_TID(GET_AOPEN_ATID(
ntohl(rpl->atid_status))));
@@ -1722,13 +1914,17 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
}
break;
default:
- printk(KERN_INFO MOD "Active open failure - "
- "atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
- atid, status, status2errno(status),
- &ep->com.local_addr.sin_addr.s_addr,
- ntohs(ep->com.local_addr.sin_port),
- &ep->com.remote_addr.sin_addr.s_addr,
- ntohs(ep->com.remote_addr.sin_port));
+ if (ep->com.local_addr.ss_family == AF_INET) {
+ pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
+ atid, status, status2errno(status),
+ &la->sin_addr.s_addr, ntohs(la->sin_port),
+ &ra->sin_addr.s_addr, ntohs(ra->sin_port));
+ } else {
+ pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
+ atid, status, status2errno(status),
+ la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
+ ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
+ }
break;
}
@@ -1766,27 +1962,6 @@ out:
return 0;
}
-static int listen_stop(struct c4iw_listen_ep *ep)
-{
- struct sk_buff *skb;
- struct cpl_close_listsvr_req *req;
-
- PDBG("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- req = (struct cpl_close_listsvr_req *) skb_put(skb, sizeof(*req));
- INIT_TP_WR(req, 0);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
- ep->stid));
- req->reply_ctrl = cpu_to_be16(
- QUEUENO(ep->com.dev->rdev.lldi.rxq_ids[0]));
- set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
- return c4iw_ofld_send(&ep->com.dev->rdev, skb);
-}
-
static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
@@ -1799,7 +1974,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
-static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
+static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
struct cpl_pass_accept_req *req)
{
struct cpl_pass_accept_rpl *rpl;
@@ -1851,16 +2026,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
rpl->opt0 = cpu_to_be64(opt0);
rpl->opt2 = cpu_to_be32(opt2);
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
+ t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
return;
}
-static void reject_cr(struct c4iw_dev *dev, u32 hwtid, __be32 peer_ip,
- struct sk_buff *skb)
+static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
{
- PDBG("%s c4iw_dev %p tid %u peer_ip %x\n", __func__, dev, hwtid,
- peer_ip);
+ PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(struct cpl_tid_release));
skb_get(skb);
@@ -1868,95 +2042,38 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, __be32 peer_ip,
return;
}
-static void get_4tuple(struct cpl_pass_accept_req *req,
- __be32 *local_ip, __be32 *peer_ip,
+static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype,
+ __u8 *local_ip, __u8 *peer_ip,
__be16 *local_port, __be16 *peer_port)
{
int eth_len = G_ETH_HDR_LEN(be32_to_cpu(req->hdr_len));
int ip_len = G_IP_HDR_LEN(be32_to_cpu(req->hdr_len));
struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
+ struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
struct tcphdr *tcp = (struct tcphdr *)
((u8 *)(req + 1) + eth_len + ip_len);
- PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
- ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
- ntohs(tcp->dest));
-
- *peer_ip = ip->saddr;
- *local_ip = ip->daddr;
+ if (ip->version == 4) {
+ PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
+ ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
+ ntohs(tcp->dest));
+ *iptype = 4;
+ memcpy(peer_ip, &ip->saddr, 4);
+ memcpy(local_ip, &ip->daddr, 4);
+ } else {
+ PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
+ ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
+ ntohs(tcp->dest));
+ *iptype = 6;
+ memcpy(peer_ip, ip6->saddr.s6_addr, 16);
+ memcpy(local_ip, ip6->daddr.s6_addr, 16);
+ }
*peer_port = tcp->source;
*local_port = tcp->dest;
return;
}
-static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst,
- struct c4iw_dev *cdev, bool clear_mpa_v1)
-{
- struct neighbour *n;
- int err, step;
-
- n = dst_neigh_lookup(dst, &peer_ip);
- if (!n)
- return -ENODEV;
-
- rcu_read_lock();
- err = -ENOMEM;
- if (n->dev->flags & IFF_LOOPBACK) {
- struct net_device *pdev;
-
- pdev = ip_dev_find(&init_net, peer_ip);
- if (!pdev) {
- err = -ENODEV;
- goto out;
- }
- ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
- n, pdev, 0);
- if (!ep->l2t)
- goto out;
- ep->mtu = pdev->mtu;
- ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
- step = cdev->rdev.lldi.ntxq /
- cdev->rdev.lldi.nchan;
- ep->txq_idx = cxgb4_port_idx(pdev) * step;
- step = cdev->rdev.lldi.nrxq /
- cdev->rdev.lldi.nchan;
- ep->ctrlq_idx = cxgb4_port_idx(pdev);
- ep->rss_qid = cdev->rdev.lldi.rxq_ids[
- cxgb4_port_idx(pdev) * step];
- dev_put(pdev);
- } else {
- ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
- n, n->dev, 0);
- if (!ep->l2t)
- goto out;
- ep->mtu = dst_mtu(dst);
- ep->tx_chan = cxgb4_port_chan(n->dev);
- ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
- step = cdev->rdev.lldi.ntxq /
- cdev->rdev.lldi.nchan;
- ep->txq_idx = cxgb4_port_idx(n->dev) * step;
- ep->ctrlq_idx = cxgb4_port_idx(n->dev);
- step = cdev->rdev.lldi.nrxq /
- cdev->rdev.lldi.nchan;
- ep->rss_qid = cdev->rdev.lldi.rxq_ids[
- cxgb4_port_idx(n->dev) * step];
-
- if (clear_mpa_v1) {
- ep->retry_with_mpa_v1 = 0;
- ep->tried_with_mpa_v1 = 0;
- }
- }
- err = 0;
-out:
- rcu_read_unlock();
-
- neigh_release(n);
-
- return err;
-}
-
static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct c4iw_ep *child_ep = NULL, *parent_ep;
@@ -1965,23 +2082,17 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
struct tid_info *t = dev->rdev.lldi.tids;
unsigned int hwtid = GET_TID(req);
struct dst_entry *dst;
- struct rtable *rt;
- __be32 local_ip, peer_ip = 0;
+ __u8 local_ip[16], peer_ip[16];
__be16 local_port, peer_port;
int err;
u16 peer_mss = ntohs(req->tcpopt.mss);
+ int iptype;
parent_ep = lookup_stid(t, stid);
if (!parent_ep) {
PDBG("%s connect request on invalid stid %d\n", __func__, stid);
goto reject;
}
- get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port);
-
- PDBG("%s parent ep %p hwtid %u laddr 0x%x raddr 0x%x lport %d " \
- "rport %d peer_mss %d\n", __func__, parent_ep, hwtid,
- ntohl(local_ip), ntohl(peer_ip), ntohs(local_port),
- ntohs(peer_port), peer_mss);
if (state_read(&parent_ep->com) != LISTEN) {
printk(KERN_ERR "%s - listening ep not in LISTEN\n",
@@ -1989,15 +2100,32 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
+ get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port);
+
/* Find output route */
- rt = find_route(dev, local_ip, peer_ip, local_port, peer_port,
- GET_POPEN_TOS(ntohl(req->tos_stid)));
- if (!rt) {
+ if (iptype == 4) {
+ PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
+ dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
+ local_port, peer_port,
+ GET_POPEN_TOS(ntohl(req->tos_stid)));
+ } else {
+ PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
+ dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
+ PASS_OPEN_TOS(ntohl(req->tos_stid)),
+ ((struct sockaddr_in6 *)
+ &parent_ep->com.local_addr)->sin6_scope_id);
+ }
+ if (!dst) {
printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
__func__);
goto reject;
}
- dst = &rt->dst;
child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
if (!child_ep) {
@@ -2007,7 +2135,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- err = import_ep(child_ep, peer_ip, dst, dev, false);
+ err = import_ep(child_ep, iptype, peer_ip, dst, dev, false);
if (err) {
printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
__func__);
@@ -2022,12 +2150,27 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
state_set(&child_ep->com, CONNECTING);
child_ep->com.dev = dev;
child_ep->com.cm_id = NULL;
- child_ep->com.local_addr.sin_family = PF_INET;
- child_ep->com.local_addr.sin_port = local_port;
- child_ep->com.local_addr.sin_addr.s_addr = local_ip;
- child_ep->com.remote_addr.sin_family = PF_INET;
- child_ep->com.remote_addr.sin_port = peer_port;
- child_ep->com.remote_addr.sin_addr.s_addr = peer_ip;
+ if (iptype == 4) {
+ struct sockaddr_in *sin = (struct sockaddr_in *)
+ &child_ep->com.local_addr;
+ sin->sin_family = PF_INET;
+ sin->sin_port = local_port;
+ sin->sin_addr.s_addr = *(__be32 *)local_ip;
+ sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
+ sin->sin_family = PF_INET;
+ sin->sin_port = peer_port;
+ sin->sin_addr.s_addr = *(__be32 *)peer_ip;
+ } else {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
+ &child_ep->com.local_addr;
+ sin6->sin6_family = PF_INET6;
+ sin6->sin6_port = local_port;
+ memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
+ sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
+ sin6->sin6_family = PF_INET6;
+ sin6->sin6_port = peer_port;
+ memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
+ }
c4iw_get_ep(&parent_ep->com);
child_ep->parent_ep = parent_ep;
child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid));
@@ -2040,11 +2183,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
init_timer(&child_ep->timer);
cxgb4_insert_tid(t, child_ep, hwtid);
insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid);
- accept_cr(child_ep, peer_ip, skb, req);
+ accept_cr(child_ep, skb, req);
set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
goto out;
reject:
- reject_cr(dev, hwtid, peer_ip, skb);
+ reject_cr(dev, hwtid, skb);
out:
return 0;
}
@@ -2512,12 +2655,79 @@ err:
return err;
}
+static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
+{
+ struct in_device *ind;
+ int found = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+
+ ind = in_dev_get(dev->rdev.lldi.ports[0]);
+ if (!ind)
+ return -EADDRNOTAVAIL;
+ for_primary_ifa(ind) {
+ laddr->sin_addr.s_addr = ifa->ifa_address;
+ raddr->sin_addr.s_addr = ifa->ifa_address;
+ found = 1;
+ break;
+ }
+ endfor_ifa(ind);
+ in_dev_put(ind);
+ return found ? 0 : -EADDRNOTAVAIL;
+}
+
+static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
+ unsigned char banned_flags)
+{
+ struct inet6_dev *idev;
+ int err = -EADDRNOTAVAIL;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev != NULL) {
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope == IFA_LINK &&
+ !(ifp->flags & banned_flags)) {
+ memcpy(addr, &ifp->addr, 16);
+ err = 0;
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+ return err;
+}
+
+static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
+{
+ struct in6_addr uninitialized_var(addr);
+ struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr;
+ struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr;
+
+ if (get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
+ memcpy(la6->sin6_addr.s6_addr, &addr, 16);
+ memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
+ return 0;
+ }
+ return -EADDRNOTAVAIL;
+}
+
int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
{
struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
struct c4iw_ep *ep;
- struct rtable *rt;
int err = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+ struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
+ struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
+ &cm_id->remote_addr;
+ __u8 *ra;
+ int iptype;
if ((conn_param->ord > c4iw_max_read_depth) ||
(conn_param->ird > c4iw_max_read_depth)) {
@@ -2545,7 +2755,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.dev = dev;
ep->com.cm_id = cm_id;
ep->com.qp = get_qhp(dev, conn_param->qpn);
- BUG_ON(!ep->com.qp);
+ if (!ep->com.qp) {
+ PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
+ err = -EINVAL;
+ goto fail2;
+ }
ref_qp(ep);
PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
ep->com.qp, cm_id);
@@ -2561,27 +2775,56 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
insert_handle(dev, &dev->atid_idr, ep, ep->atid);
- PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__,
- ntohl(cm_id->local_addr.sin_addr.s_addr),
- ntohs(cm_id->local_addr.sin_port),
- ntohl(cm_id->remote_addr.sin_addr.s_addr),
- ntohs(cm_id->remote_addr.sin_port));
+ if (cm_id->remote_addr.ss_family == AF_INET) {
+ iptype = 4;
+ ra = (__u8 *)&raddr->sin_addr;
- /* find a route */
- rt = find_route(dev,
- cm_id->local_addr.sin_addr.s_addr,
- cm_id->remote_addr.sin_addr.s_addr,
- cm_id->local_addr.sin_port,
- cm_id->remote_addr.sin_port, 0);
- if (!rt) {
+ /*
+ * Handle loopback requests to INADDR_ANY.
+ */
+ if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
+ err = pick_local_ipaddrs(dev, cm_id);
+ if (err)
+ goto fail2;
+ }
+
+ /* find a route */
+ PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
+ __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
+ ra, ntohs(raddr->sin_port));
+ ep->dst = find_route(dev, laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr, laddr->sin_port,
+ raddr->sin_port, 0);
+ } else {
+ iptype = 6;
+ ra = (__u8 *)&raddr6->sin6_addr;
+
+ /*
+ * Handle loopback requests to INADDR_ANY.
+ */
+ if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
+ err = pick_local_ip6addrs(dev, cm_id);
+ if (err)
+ goto fail2;
+ }
+
+ /* find a route */
+ PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
+ __func__, laddr6->sin6_addr.s6_addr,
+ ntohs(laddr6->sin6_port),
+ raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
+ ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port, raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
+ }
+ if (!ep->dst) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
- ep->dst = &rt->dst;
- err = import_ep(ep, cm_id->remote_addr.sin_addr.s_addr,
- ep->dst, ep->com.dev, true);
+ err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
if (err) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
goto fail4;
@@ -2593,8 +2836,10 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
state_set(&ep->com, CONNECTING);
ep->tos = 0;
- ep->com.local_addr = cm_id->local_addr;
- ep->com.remote_addr = cm_id->remote_addr;
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+ sizeof(ep->com.remote_addr));
/* send connect request to rnic */
err = send_connect(ep);
@@ -2614,6 +2859,60 @@ out:
return err;
}
+static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
+{
+ int err;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+
+ c4iw_init_wr_wait(&ep->com.wr_wait);
+ err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
+ ep->stid, &sin6->sin6_addr,
+ sin6->sin6_port,
+ ep->com.dev->rdev.lldi.rxq_ids[0]);
+ if (!err)
+ err = c4iw_wait_for_reply(&ep->com.dev->rdev,
+ &ep->com.wr_wait,
+ 0, 0, __func__);
+ if (err)
+ pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
+ err, ep->stid,
+ sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
+ return err;
+}
+
+static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
+{
+ int err;
+ struct sockaddr_in *sin = (struct sockaddr_in *)&ep->com.local_addr;
+
+ if (dev->rdev.lldi.enable_fw_ofld_conn) {
+ do {
+ err = cxgb4_create_server_filter(
+ ep->com.dev->rdev.lldi.ports[0], ep->stid,
+ sin->sin_addr.s_addr, sin->sin_port, 0,
+ ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
+ if (err == -EBUSY) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(usecs_to_jiffies(100));
+ }
+ } while (err == -EBUSY);
+ } else {
+ c4iw_init_wr_wait(&ep->com.wr_wait);
+ err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
+ ep->stid, sin->sin_addr.s_addr, sin->sin_port,
+ 0, ep->com.dev->rdev.lldi.rxq_ids[0]);
+ if (!err)
+ err = c4iw_wait_for_reply(&ep->com.dev->rdev,
+ &ep->com.wr_wait,
+ 0, 0, __func__);
+ }
+ if (err)
+ pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
+ , err, ep->stid,
+ &sin->sin_addr, ntohs(sin->sin_port));
+ return err;
+}
+
int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
{
int err = 0;
@@ -2633,15 +2932,18 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
ep->com.cm_id = cm_id;
ep->com.dev = dev;
ep->backlog = backlog;
- ep->com.local_addr = cm_id->local_addr;
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
/*
* Allocate a server TID.
*/
if (dev->rdev.lldi.enable_fw_ofld_conn)
- ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, PF_INET, ep);
+ ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
+ cm_id->local_addr.ss_family, ep);
else
- ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep);
+ ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
+ cm_id->local_addr.ss_family, ep);
if (ep->stid == -1) {
printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
@@ -2650,43 +2952,16 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
}
insert_handle(dev, &dev->stid_idr, ep, ep->stid);
state_set(&ep->com, LISTEN);
- if (dev->rdev.lldi.enable_fw_ofld_conn) {
- do {
- err = cxgb4_create_server_filter(
- ep->com.dev->rdev.lldi.ports[0], ep->stid,
- ep->com.local_addr.sin_addr.s_addr,
- ep->com.local_addr.sin_port,
- 0,
- ep->com.dev->rdev.lldi.rxq_ids[0],
- 0,
- 0);
- if (err == -EBUSY) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(usecs_to_jiffies(100));
- }
- } while (err == -EBUSY);
- } else {
- c4iw_init_wr_wait(&ep->com.wr_wait);
- err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
- ep->stid, ep->com.local_addr.sin_addr.s_addr,
- ep->com.local_addr.sin_port,
- 0,
- ep->com.dev->rdev.lldi.rxq_ids[0]);
- if (!err)
- err = c4iw_wait_for_reply(&ep->com.dev->rdev,
- &ep->com.wr_wait,
- 0, 0, __func__);
- }
+ if (ep->com.local_addr.ss_family == AF_INET)
+ err = create_server4(dev, ep);
+ else
+ err = create_server6(dev, ep);
if (!err) {
cm_id->provider_data = ep;
goto out;
}
- pr_err("%s cxgb4_create_server/filter failed err %d " \
- "stid %d laddr %08x lport %d\n", \
- __func__, err, ep->stid,
- ntohl(ep->com.local_addr.sin_addr.s_addr),
- ntohs(ep->com.local_addr.sin_port));
- cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
+ cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
+ ep->com.local_addr.ss_family);
fail2:
cm_id->rem_ref(cm_id);
c4iw_put_ep(&ep->com);
@@ -2704,20 +2979,24 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
might_sleep();
state_set(&ep->com, DEAD);
- if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn) {
+ if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
+ ep->com.local_addr.ss_family == AF_INET) {
err = cxgb4_remove_server_filter(
ep->com.dev->rdev.lldi.ports[0], ep->stid,
ep->com.dev->rdev.lldi.rxq_ids[0], 0);
} else {
c4iw_init_wr_wait(&ep->com.wr_wait);
- err = listen_stop(ep);
+ err = cxgb4_remove_server(
+ ep->com.dev->rdev.lldi.ports[0], ep->stid,
+ ep->com.dev->rdev.lldi.rxq_ids[0], 0);
if (err)
goto done;
err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
0, 0, __func__);
}
remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
- cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
+ cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
+ ep->com.local_addr.ss_family);
done:
cm_id->rem_ref(cm_id);
c4iw_put_ep(&ep->com);
@@ -3021,7 +3300,6 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_pass_accept_req *req = (void *)(rss + 1);
struct l2t_entry *e;
struct dst_entry *dst;
- struct rtable *rt;
struct c4iw_ep *lep;
u16 window;
struct port_info *pi;
@@ -3079,14 +3357,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
ntohs(tcph->source), iph->tos);
- rt = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
- iph->tos);
- if (!rt) {
+ dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
+ iph->tos);
+ if (!dst) {
pr_err("%s - failed to find dst entry!\n",
__func__);
goto reject;
}
- dst = &rt->dst;
neigh = dst_neigh_lookup_skb(dst, skb);
if (!neigh) {
@@ -3103,10 +3380,11 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
tx_chan = cxgb4_port_chan(pdev);
dev_put(pdev);
} else {
+ pdev = get_real_dev(neigh->dev);
e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
- neigh->dev, 0);
- pi = (struct port_info *)netdev_priv(neigh->dev);
- tx_chan = cxgb4_port_chan(neigh->dev);
+ pdev, 0);
+ pi = (struct port_info *)netdev_priv(pdev);
+ tx_chan = cxgb4_port_chan(pdev);
}
if (!e) {
pr_err("%s - failed to allocate l2t entry!\n",
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 0f1607c8325a..88de3aa9c5b0 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -225,43 +225,186 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
t4_swcq_produce(cq);
}
-int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count)
+static void advance_oldest_read(struct t4_wq *wq);
+
+int c4iw_flush_sq(struct c4iw_qp *qhp)
{
int flushed = 0;
- struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count];
- int in_use = wq->sq.in_use - count;
-
- BUG_ON(in_use < 0);
- while (in_use--) {
- swsqe->signaled = 0;
- insert_sq_cqe(wq, cq, swsqe);
- swsqe++;
- if (swsqe == (wq->sq.sw_sq + wq->sq.size))
- swsqe = wq->sq.sw_sq;
- flushed++;
+ struct t4_wq *wq = &qhp->wq;
+ struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
+ struct t4_cq *cq = &chp->cq;
+ int idx;
+ struct t4_swsqe *swsqe;
+ int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING &&
+ qhp->attr.state != C4IW_QP_STATE_IDLE);
+
+ if (wq->sq.flush_cidx == -1)
+ wq->sq.flush_cidx = wq->sq.cidx;
+ idx = wq->sq.flush_cidx;
+ BUG_ON(idx >= wq->sq.size);
+ while (idx != wq->sq.pidx) {
+ if (error) {
+ swsqe = &wq->sq.sw_sq[idx];
+ BUG_ON(swsqe->flushed);
+ swsqe->flushed = 1;
+ insert_sq_cqe(wq, cq, swsqe);
+ if (wq->sq.oldest_read == swsqe) {
+ BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
+ advance_oldest_read(wq);
+ }
+ flushed++;
+ } else {
+ t4_sq_consume(wq);
+ }
+ if (++idx == wq->sq.size)
+ idx = 0;
}
+ wq->sq.flush_cidx += flushed;
+ if (wq->sq.flush_cidx >= wq->sq.size)
+ wq->sq.flush_cidx -= wq->sq.size;
return flushed;
}
+static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
+{
+ struct t4_swsqe *swsqe;
+ int cidx;
+
+ if (wq->sq.flush_cidx == -1)
+ wq->sq.flush_cidx = wq->sq.cidx;
+ cidx = wq->sq.flush_cidx;
+ BUG_ON(cidx > wq->sq.size);
+
+ while (cidx != wq->sq.pidx) {
+ swsqe = &wq->sq.sw_sq[cidx];
+ if (!swsqe->signaled) {
+ if (++cidx == wq->sq.size)
+ cidx = 0;
+ } else if (swsqe->complete) {
+
+ BUG_ON(swsqe->flushed);
+
+ /*
+ * Insert this completed cqe into the swcq.
+ */
+ PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
+ __func__, cidx, cq->sw_pidx);
+ swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
+ cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
+ t4_swcq_produce(cq);
+ swsqe->flushed = 1;
+ if (++cidx == wq->sq.size)
+ cidx = 0;
+ wq->sq.flush_cidx = cidx;
+ } else
+ break;
+ }
+}
+
+static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
+ struct t4_cqe *read_cqe)
+{
+ read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
+ read_cqe->len = htonl(wq->sq.oldest_read->read_len);
+ read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
+ V_CQE_SWCQE(SW_CQE(hw_cqe)) |
+ V_CQE_OPCODE(FW_RI_READ_REQ) |
+ V_CQE_TYPE(1));
+ read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
+}
+
+static void advance_oldest_read(struct t4_wq *wq)
+{
+
+ u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
+
+ if (rptr == wq->sq.size)
+ rptr = 0;
+ while (rptr != wq->sq.pidx) {
+ wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
+
+ if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
+ return;
+ if (++rptr == wq->sq.size)
+ rptr = 0;
+ }
+ wq->sq.oldest_read = NULL;
+}
+
/*
* Move all CQEs from the HWCQ into the SWCQ.
+ * Deal with out-of-order and/or completions that complete
+ * prior unsignalled WRs.
*/
-void c4iw_flush_hw_cq(struct t4_cq *cq)
+void c4iw_flush_hw_cq(struct c4iw_cq *chp)
{
- struct t4_cqe *cqe = NULL, *swcqe;
+ struct t4_cqe *hw_cqe, *swcqe, read_cqe;
+ struct c4iw_qp *qhp;
+ struct t4_swsqe *swsqe;
int ret;
- PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
- ret = t4_next_hw_cqe(cq, &cqe);
+ PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid);
+ ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
+
+ /*
+ * This logic is similar to poll_cq(), but not quite the same
+ * unfortunately. Need to move pertinent HW CQEs to the SW CQ but
+ * also do any translation magic that poll_cq() normally does.
+ */
while (!ret) {
- PDBG("%s flushing hwcq cidx 0x%x swcq pidx 0x%x\n",
- __func__, cq->cidx, cq->sw_pidx);
- swcqe = &cq->sw_queue[cq->sw_pidx];
- *swcqe = *cqe;
- swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
- t4_swcq_produce(cq);
- t4_hwcq_consume(cq);
- ret = t4_next_hw_cqe(cq, &cqe);
+ qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
+
+ /*
+ * drop CQEs with no associated QP
+ */
+ if (qhp == NULL)
+ goto next_cqe;
+
+ if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
+ goto next_cqe;
+
+ if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
+
+ /*
+ * drop peer2peer RTR reads.
+ */
+ if (CQE_WRID_STAG(hw_cqe) == 1)
+ goto next_cqe;
+
+ /*
+ * Eat completions for unsignaled read WRs.
+ */
+ if (!qhp->wq.sq.oldest_read->signaled) {
+ advance_oldest_read(&qhp->wq);
+ goto next_cqe;
+ }
+
+ /*
+ * Don't write to the HWCQ, create a new read req CQE
+ * in local memory and move it into the swcq.
+ */
+ create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
+ hw_cqe = &read_cqe;
+ advance_oldest_read(&qhp->wq);
+ }
+
+ /* if its a SQ completion, then do the magic to move all the
+ * unsignaled and now in-order completions into the swcq.
+ */
+ if (SQ_TYPE(hw_cqe)) {
+ swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
+ swsqe->cqe = *hw_cqe;
+ swsqe->complete = 1;
+ flush_completed_wrs(&qhp->wq, &chp->cq);
+ } else {
+ swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
+ *swcqe = *hw_cqe;
+ swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
+ t4_swcq_produce(&chp->cq);
+ }
+next_cqe:
+ t4_hwcq_consume(&chp->cq);
+ ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
}
}
@@ -281,25 +424,6 @@ static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
return 1;
}
-void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
-{
- struct t4_cqe *cqe;
- u32 ptr;
-
- *count = 0;
- ptr = cq->sw_cidx;
- while (ptr != cq->sw_pidx) {
- cqe = &cq->sw_queue[ptr];
- if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) &&
- wq->sq.oldest_read)) &&
- (CQE_QPID(cqe) == wq->sq.qid))
- (*count)++;
- if (++ptr == cq->size)
- ptr = 0;
- }
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
-}
-
void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
{
struct t4_cqe *cqe;
@@ -319,70 +443,6 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
PDBG("%s cq %p count %d\n", __func__, cq, *count);
}
-static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
-{
- struct t4_swsqe *swsqe;
- u16 ptr = wq->sq.cidx;
- int count = wq->sq.in_use;
- int unsignaled = 0;
-
- swsqe = &wq->sq.sw_sq[ptr];
- while (count--)
- if (!swsqe->signaled) {
- if (++ptr == wq->sq.size)
- ptr = 0;
- swsqe = &wq->sq.sw_sq[ptr];
- unsignaled++;
- } else if (swsqe->complete) {
-
- /*
- * Insert this completed cqe into the swcq.
- */
- PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
- __func__, ptr, cq->sw_pidx);
- swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
- cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
- t4_swcq_produce(cq);
- swsqe->signaled = 0;
- wq->sq.in_use -= unsignaled;
- break;
- } else
- break;
-}
-
-static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
- struct t4_cqe *read_cqe)
-{
- read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
- read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len);
- read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
- V_CQE_SWCQE(SW_CQE(hw_cqe)) |
- V_CQE_OPCODE(FW_RI_READ_REQ) |
- V_CQE_TYPE(1));
- read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
-}
-
-/*
- * Return a ptr to the next read wr in the SWSQ or NULL.
- */
-static void advance_oldest_read(struct t4_wq *wq)
-{
-
- u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
-
- if (rptr == wq->sq.size)
- rptr = 0;
- while (rptr != wq->sq.pidx) {
- wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
-
- if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
- return;
- if (++rptr == wq->sq.size)
- rptr = 0;
- }
- wq->sq.oldest_read = NULL;
-}
-
/*
* poll_cq
*
@@ -427,6 +487,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
}
/*
+ * skip hw cqe's if the wq is flushed.
+ */
+ if (wq->flushed && !SW_CQE(hw_cqe)) {
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
+ * skip TERMINATE cqes...
+ */
+ if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
* Gotta tweak READ completions:
* 1) the cqe doesn't contain the sq_wptr from the wr.
* 2) opcode not reflected from the wr.
@@ -440,7 +516,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
* was generated by the kernel driver as part of peer-2-peer
* connection setup. So ignore the completion.
*/
- if (!wq->sq.oldest_read) {
+ if (CQE_WRID_STAG(hw_cqe) == 1) {
if (CQE_STATUS(hw_cqe))
t4_set_wq_in_error(wq);
ret = -EAGAIN;
@@ -448,6 +524,15 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
}
/*
+ * Eat completions for unsignaled read WRs.
+ */
+ if (!wq->sq.oldest_read->signaled) {
+ advance_oldest_read(wq);
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
* Don't write to the HWCQ, so create a new read req CQE
* in local memory.
*/
@@ -457,14 +542,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
}
if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
- *cqe_flushed = t4_wq_in_error(wq);
+ *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
t4_set_wq_in_error(wq);
- goto proc_cqe;
- }
-
- if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
- ret = -EAGAIN;
- goto skip_cqe;
}
/*
@@ -523,7 +602,24 @@ proc_cqe:
* completion.
*/
if (SQ_TYPE(hw_cqe)) {
- wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe);
+ int idx = CQE_WRID_SQ_IDX(hw_cqe);
+ BUG_ON(idx > wq->sq.size);
+
+ /*
+ * Account for any unsignaled completions completed by
+ * this signaled completion. In this case, cidx points
+ * to the first unsignaled one, and idx points to the
+ * signaled one. So adjust in_use based on this delta.
+ * if this is not completing any unsigned wrs, then the
+ * delta will be 0. Handle wrapping also!
+ */
+ if (idx < wq->sq.cidx)
+ wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
+ else
+ wq->sq.in_use -= idx - wq->sq.cidx;
+ BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size);
+
+ wq->sq.cidx = (uint16_t)idx;
PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
t4_sq_consume(wq);
@@ -532,6 +628,7 @@ proc_cqe:
*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
BUG_ON(t4_rq_empty(wq));
t4_rq_consume(wq);
+ goto skip_cqe;
}
flush_wq:
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index ae656016e1ae..33d2cc6ab562 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -103,18 +103,43 @@ static int dump_qp(int id, void *p, void *data)
if (space == 0)
return 1;
- if (qp->ep)
- cc = snprintf(qpd->buf + qpd->pos, space,
- "qp sq id %u rq id %u state %u onchip %u "
- "ep tid %u state %u %pI4:%u->%pI4:%u\n",
- qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state,
- qp->wq.sq.flags & T4_SQ_ONCHIP,
- qp->ep->hwtid, (int)qp->ep->com.state,
- &qp->ep->com.local_addr.sin_addr.s_addr,
- ntohs(qp->ep->com.local_addr.sin_port),
- &qp->ep->com.remote_addr.sin_addr.s_addr,
- ntohs(qp->ep->com.remote_addr.sin_port));
- else
+ if (qp->ep) {
+ if (qp->ep->com.local_addr.ss_family == AF_INET) {
+ struct sockaddr_in *lsin = (struct sockaddr_in *)
+ &qp->ep->com.local_addr;
+ struct sockaddr_in *rsin = (struct sockaddr_in *)
+ &qp->ep->com.remote_addr;
+
+ cc = snprintf(qpd->buf + qpd->pos, space,
+ "rc qp sq id %u rq id %u state %u "
+ "onchip %u ep tid %u state %u "
+ "%pI4:%u->%pI4:%u\n",
+ qp->wq.sq.qid, qp->wq.rq.qid,
+ (int)qp->attr.state,
+ qp->wq.sq.flags & T4_SQ_ONCHIP,
+ qp->ep->hwtid, (int)qp->ep->com.state,
+ &lsin->sin_addr, ntohs(lsin->sin_port),
+ &rsin->sin_addr, ntohs(rsin->sin_port));
+ } else {
+ struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
+ &qp->ep->com.local_addr;
+ struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
+ &qp->ep->com.remote_addr;
+
+ cc = snprintf(qpd->buf + qpd->pos, space,
+ "rc qp sq id %u rq id %u state %u "
+ "onchip %u ep tid %u state %u "
+ "%pI6:%u->%pI6:%u\n",
+ qp->wq.sq.qid, qp->wq.rq.qid,
+ (int)qp->attr.state,
+ qp->wq.sq.flags & T4_SQ_ONCHIP,
+ qp->ep->hwtid, (int)qp->ep->com.state,
+ &lsin6->sin6_addr,
+ ntohs(lsin6->sin6_port),
+ &rsin6->sin6_addr,
+ ntohs(rsin6->sin6_port));
+ }
+ } else
cc = snprintf(qpd->buf + qpd->pos, space,
"qp sq id %u rq id %u state %u onchip %u\n",
qp->wq.sq.qid, qp->wq.rq.qid,
@@ -351,15 +376,37 @@ static int dump_ep(int id, void *p, void *data)
if (space == 0)
return 1;
- cc = snprintf(epd->buf + epd->pos, space,
- "ep %p cm_id %p qp %p state %d flags 0x%lx history 0x%lx "
- "hwtid %d atid %d %pI4:%d <-> %pI4:%d\n",
- ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state,
- ep->com.flags, ep->com.history, ep->hwtid, ep->atid,
- &ep->com.local_addr.sin_addr.s_addr,
- ntohs(ep->com.local_addr.sin_port),
- &ep->com.remote_addr.sin_addr.s_addr,
- ntohs(ep->com.remote_addr.sin_port));
+ if (ep->com.local_addr.ss_family == AF_INET) {
+ struct sockaddr_in *lsin = (struct sockaddr_in *)
+ &ep->com.local_addr;
+ struct sockaddr_in *rsin = (struct sockaddr_in *)
+ &ep->com.remote_addr;
+
+ cc = snprintf(epd->buf + epd->pos, space,
+ "ep %p cm_id %p qp %p state %d flags 0x%lx "
+ "history 0x%lx hwtid %d atid %d "
+ "%pI4:%d <-> %pI4:%d\n",
+ ep, ep->com.cm_id, ep->com.qp,
+ (int)ep->com.state, ep->com.flags,
+ ep->com.history, ep->hwtid, ep->atid,
+ &lsin->sin_addr, ntohs(lsin->sin_port),
+ &rsin->sin_addr, ntohs(rsin->sin_port));
+ } else {
+ struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
+ &ep->com.local_addr;
+ struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
+ &ep->com.remote_addr;
+
+ cc = snprintf(epd->buf + epd->pos, space,
+ "ep %p cm_id %p qp %p state %d flags 0x%lx "
+ "history 0x%lx hwtid %d atid %d "
+ "%pI6:%d <-> %pI6:%d\n",
+ ep, ep->com.cm_id, ep->com.qp,
+ (int)ep->com.state, ep->com.flags,
+ ep->com.history, ep->hwtid, ep->atid,
+ &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
+ &rsin6->sin6_addr, ntohs(rsin6->sin6_port));
+ }
if (cc < space)
epd->pos += cc;
return 0;
@@ -376,12 +423,27 @@ static int dump_listen_ep(int id, void *p, void *data)
if (space == 0)
return 1;
- cc = snprintf(epd->buf + epd->pos, space,
- "ep %p cm_id %p state %d flags 0x%lx stid %d backlog %d "
- "%pI4:%d\n", ep, ep->com.cm_id, (int)ep->com.state,
- ep->com.flags, ep->stid, ep->backlog,
- &ep->com.local_addr.sin_addr.s_addr,
- ntohs(ep->com.local_addr.sin_port));
+ if (ep->com.local_addr.ss_family == AF_INET) {
+ struct sockaddr_in *lsin = (struct sockaddr_in *)
+ &ep->com.local_addr;
+
+ cc = snprintf(epd->buf + epd->pos, space,
+ "ep %p cm_id %p state %d flags 0x%lx stid %d "
+ "backlog %d %pI4:%d\n",
+ ep, ep->com.cm_id, (int)ep->com.state,
+ ep->com.flags, ep->stid, ep->backlog,
+ &lsin->sin_addr, ntohs(lsin->sin_port));
+ } else {
+ struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
+ &ep->com.local_addr;
+
+ cc = snprintf(epd->buf + epd->pos, space,
+ "ep %p cm_id %p state %d flags 0x%lx stid %d "
+ "backlog %d %pI6:%d\n",
+ ep, ep->com.cm_id, (int)ep->com.state,
+ ep->com.flags, ep->stid, ep->backlog,
+ &lsin6->sin6_addr, ntohs(lsin6->sin6_port));
+ }
if (cc < space)
epd->pos += cc;
return 0;
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index 1a840b2211dd..d61d0a18f784 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -44,16 +44,6 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
struct c4iw_qp_attributes attrs;
unsigned long flag;
- if ((qhp->attr.state == C4IW_QP_STATE_ERROR) ||
- (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) {
- pr_err("%s AE after RTS - qpid 0x%x opcode %d status 0x%x "\
- "type %d wrid.hi 0x%x wrid.lo 0x%x\n",
- __func__, CQE_QPID(err_cqe), CQE_OPCODE(err_cqe),
- CQE_STATUS(err_cqe), CQE_TYPE(err_cqe),
- CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
- return;
- }
-
printk(KERN_ERR MOD "AE qpid 0x%x opcode %d status 0x%x "
"type %d wrid.hi 0x%x wrid.lo 0x%x\n",
CQE_QPID(err_cqe), CQE_OPCODE(err_cqe),
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 485183ad34cd..23eaeabab93b 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -752,8 +752,8 @@ struct c4iw_ep_common {
enum c4iw_ep_state state;
struct kref kref;
struct mutex mutex;
- struct sockaddr_in local_addr;
- struct sockaddr_in remote_addr;
+ struct sockaddr_storage local_addr;
+ struct sockaddr_storage remote_addr;
struct c4iw_wr_wait wr_wait;
unsigned long flags;
unsigned long history;
@@ -917,12 +917,11 @@ void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size);
void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size);
int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb);
-void c4iw_flush_hw_cq(struct t4_cq *cq);
+void c4iw_flush_hw_cq(struct c4iw_cq *chp);
void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
-void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
-int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count);
+int c4iw_flush_sq(struct c4iw_qp *qhp);
int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid);
u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index a4975e1654a6..582936708e6e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -737,6 +737,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
swsqe->idx = qhp->wq.sq.pidx;
swsqe->complete = 0;
swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+ swsqe->flushed = 0;
swsqe->wr_id = wr->wr_id;
init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
@@ -1006,7 +1007,15 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
/* locking hierarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&rchp->lock, flag);
spin_lock(&qhp->lock);
- c4iw_flush_hw_cq(&rchp->cq);
+
+ if (qhp->wq.flushed) {
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&rchp->lock, flag);
+ return;
+ }
+ qhp->wq.flushed = 1;
+
+ c4iw_flush_hw_cq(rchp);
c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
spin_unlock(&qhp->lock);
@@ -1020,9 +1029,9 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
/* locking hierarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&schp->lock, flag);
spin_lock(&qhp->lock);
- c4iw_flush_hw_cq(&schp->cq);
- c4iw_count_scqes(&schp->cq, &qhp->wq, &count);
- flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count);
+ if (schp != rchp)
+ c4iw_flush_hw_cq(schp);
+ flushed = c4iw_flush_sq(qhp);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&schp->lock, flag);
if (flushed) {
@@ -1037,11 +1046,11 @@ static void flush_qp(struct c4iw_qp *qhp)
struct c4iw_cq *rchp, *schp;
unsigned long flag;
- rchp = get_chp(qhp->rhp, qhp->attr.rcq);
- schp = get_chp(qhp->rhp, qhp->attr.scq);
+ rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+ schp = to_c4iw_cq(qhp->ibqp.send_cq);
+ t4_set_wq_in_error(&qhp->wq);
if (qhp->ibqp.uobject) {
- t4_set_wq_in_error(&qhp->wq);
t4_set_cq_in_error(&rchp->cq);
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1330,8 +1339,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
disconnect = 1;
c4iw_get_ep(&qhp->ep->com);
}
- if (qhp->ibqp.uobject)
- t4_set_wq_in_error(&qhp->wq);
+ t4_set_wq_in_error(&qhp->wq);
ret = rdma_fini(rhp, qhp, ep);
if (ret)
goto err;
@@ -1340,18 +1348,21 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
set_state(qhp, C4IW_QP_STATE_TERMINATE);
qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode;
- if (qhp->ibqp.uobject)
- t4_set_wq_in_error(&qhp->wq);
+ t4_set_wq_in_error(&qhp->wq);
ep = qhp->ep;
+ disconnect = 1;
if (!internal)
terminate = 1;
- disconnect = 1;
+ else {
+ ret = rdma_fini(rhp, qhp, ep);
+ if (ret)
+ goto err;
+ }
c4iw_get_ep(&qhp->ep->com);
break;
case C4IW_QP_STATE_ERROR:
set_state(qhp, C4IW_QP_STATE_ERROR);
- if (qhp->ibqp.uobject)
- t4_set_wq_in_error(&qhp->wq);
+ t4_set_wq_in_error(&qhp->wq);
if (!internal) {
abort = 1;
disconnect = 1;
@@ -1552,12 +1563,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
-
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
if (!qhp)
return ERR_PTR(-ENOMEM);
qhp->wq.sq.size = sqsize;
qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue;
+ qhp->wq.sq.flush_cidx = -1;
qhp->wq.rq.size = rqsize;
qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index ebcb03bd1b72..e73ace739183 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -36,9 +36,9 @@
#include "t4_msg.h"
#include "t4fw_ri_api.h"
-#define T4_MAX_NUM_QP (1<<16)
-#define T4_MAX_NUM_CQ (1<<15)
-#define T4_MAX_NUM_PD (1<<15)
+#define T4_MAX_NUM_QP 65536
+#define T4_MAX_NUM_CQ 65536
+#define T4_MAX_NUM_PD 65536
#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1)
#define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES)
#define T4_MAX_IQ_SIZE (65520 - 1)
@@ -47,7 +47,7 @@
#define T4_MAX_QP_DEPTH (T4_MAX_RQ_SIZE - 1)
#define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1)
#define T4_MAX_NUM_STAG (1<<15)
-#define T4_MAX_MR_SIZE (~0ULL - 1)
+#define T4_MAX_MR_SIZE (~0ULL)
#define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
#define T4_STAG_UNSET 0xffffffff
#define T4_FW_MAJ 0
@@ -269,6 +269,7 @@ struct t4_swsqe {
int complete;
int signaled;
u16 idx;
+ int flushed;
};
static inline pgprot_t t4_pgprot_wc(pgprot_t prot)
@@ -300,6 +301,7 @@ struct t4_sq {
u16 pidx;
u16 wq_pidx;
u16 flags;
+ short flush_cidx;
};
struct t4_swrqe {
@@ -330,6 +332,7 @@ struct t4_wq {
void __iomem *db;
void __iomem *gts;
struct c4iw_rdev *rdev;
+ int flushed;
};
static inline int t4_rqes_posted(struct t4_wq *wq)
@@ -412,6 +415,9 @@ static inline void t4_sq_produce(struct t4_wq *wq, u8 len16)
static inline void t4_sq_consume(struct t4_wq *wq)
{
+ BUG_ON(wq->sq.in_use < 1);
+ if (wq->sq.cidx == wq->sq.flush_cidx)
+ wq->sq.flush_cidx = -1;
wq->sq.in_use--;
if (++wq->sq.cidx == wq->sq.size)
wq->sq.cidx = 0;
@@ -505,12 +511,18 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se)
static inline void t4_swcq_produce(struct t4_cq *cq)
{
cq->sw_in_use++;
+ if (cq->sw_in_use == cq->size) {
+ PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ cq->error = 1;
+ BUG_ON(1);
+ }
if (++cq->sw_pidx == cq->size)
cq->sw_pidx = 0;
}
static inline void t4_swcq_consume(struct t4_cq *cq)
{
+ BUG_ON(cq->sw_in_use < 1);
cq->sw_in_use--;
if (++cq->sw_cidx == cq->size)
cq->sw_cidx = 0;
@@ -519,7 +531,7 @@ static inline void t4_swcq_consume(struct t4_cq *cq)
static inline void t4_hwcq_consume(struct t4_cq *cq)
{
cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts;
- if (++cq->cidx_inc == (cq->size >> 4)) {
+ if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_MASK) {
u32 val;
val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) |
@@ -552,6 +564,7 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
ret = -EOVERFLOW;
cq->error = 1;
printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
+ BUG_ON(1);
} else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
*cqe = &cq->queue[cq->cidx];
ret = 0;
@@ -562,6 +575,12 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
{
+ if (cq->sw_in_use == cq->size) {
+ PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ cq->error = 1;
+ BUG_ON(1);
+ return NULL;
+ }
if (cq->sw_in_use)
return &cq->sw_queue[cq->sw_cidx];
return NULL;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index a188d3178559..d6c5a73becf4 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -54,6 +54,8 @@
#define DRV_VERSION "1.0"
#define DRV_RELDATE "April 4, 2008"
+#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
@@ -88,6 +90,25 @@ static void init_query_mad(struct ib_smp *mad)
static union ib_gid zgid;
+static int check_flow_steering_support(struct mlx4_dev *dev)
+{
+ int ib_num_ports = 0;
+ int i;
+
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ if (ib_num_ports || mlx4_is_mfunc(dev)) {
+ pr_warn("Device managed flow steering is unavailable "
+ "for IB ports or in multifunction env.\n");
+ return 0;
+ }
+ return 1;
+ }
+ return 0;
+}
+
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
@@ -144,6 +165,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
else
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
+ if (check_flow_steering_support(dev->dev))
+ props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
@@ -798,6 +821,209 @@ struct mlx4_ib_steering {
union ib_gid gid;
};
+static int parse_flow_attr(struct mlx4_dev *dev,
+ union ib_flow_spec *ib_spec,
+ struct _rule_hw *mlx4_spec)
+{
+ enum mlx4_net_trans_rule_id type;
+
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ type = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
+ ETH_ALEN);
+ memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
+ ETH_ALEN);
+ mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
+ mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
+ break;
+
+ case IB_FLOW_SPEC_IPV4:
+ type = MLX4_NET_TRANS_RULE_ID_IPV4;
+ mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
+ mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
+ mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
+ mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
+ break;
+
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ type = ib_spec->type == IB_FLOW_SPEC_TCP ?
+ MLX4_NET_TRANS_RULE_ID_TCP :
+ MLX4_NET_TRANS_RULE_ID_UDP;
+ mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
+ mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
+ mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
+ mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
+ mlx4_hw_rule_sz(dev, type) < 0)
+ return -EINVAL;
+ mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
+ mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
+ return mlx4_hw_rule_sz(dev, type);
+}
+
+static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
+ int domain,
+ enum mlx4_net_trans_promisc_mode flow_type,
+ u64 *reg_id)
+{
+ int ret, i;
+ int size = 0;
+ void *ib_flow;
+ struct mlx4_ib_dev *mdev = to_mdev(qp->device);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) +
+ (sizeof(struct _rule_hw) * flow_attr->num_of_specs);
+
+ static const u16 __mlx4_domain[] = {
+ [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
+ [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
+ [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
+ [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
+ };
+
+ if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
+ pr_err("Invalid priority value %d\n", flow_attr->priority);
+ return -EINVAL;
+ }
+
+ if (domain >= IB_FLOW_DOMAIN_NUM) {
+ pr_err("Invalid domain value %d\n", domain);
+ return -EINVAL;
+ }
+
+ if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
+ return -EINVAL;
+
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ memset(mailbox->buf, 0, rule_size);
+ ctrl = mailbox->buf;
+
+ ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
+ flow_attr->priority);
+ ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
+ ctrl->port = flow_attr->port;
+ ctrl->qpn = cpu_to_be32(qp->qp_num);
+
+ ib_flow = flow_attr + 1;
+ size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+ for (i = 0; i < flow_attr->num_of_specs; i++) {
+ ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return -EINVAL;
+ }
+ ib_flow += ((union ib_flow_spec *) ib_flow)->size;
+ size += ret;
+ }
+
+ ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (ret == -ENOMEM)
+ pr_err("mcg table is full. Fail to register network rule.\n");
+ else if (ret == -ENXIO)
+ pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
+ else if (ret)
+ pr_err("Invalid argumant. Fail to register network rule.\n");
+
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return ret;
+}
+
+static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
+{
+ int err;
+ err = mlx4_cmd(dev, reg_id, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ pr_err("Fail to detach network rule. registration id = 0x%llx\n",
+ reg_id);
+ return err;
+}
+
+static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ int err = 0, i = 0;
+ struct mlx4_ib_flow *mflow;
+ enum mlx4_net_trans_promisc_mode type[2];
+
+ memset(type, 0, sizeof(type));
+
+ mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
+ if (!mflow) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ type[0] = MLX4_FS_REGULAR;
+ break;
+
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ type[0] = MLX4_FS_ALL_DEFAULT;
+ break;
+
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ type[0] = MLX4_FS_MC_DEFAULT;
+ break;
+
+ case IB_FLOW_ATTR_SNIFFER:
+ type[0] = MLX4_FS_UC_SNIFFER;
+ type[1] = MLX4_FS_MC_SNIFFER;
+ break;
+
+ default:
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ while (i < ARRAY_SIZE(type) && type[i]) {
+ err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
+ &mflow->reg_id[i]);
+ if (err)
+ goto err_free;
+ i++;
+ }
+
+ return &mflow->ibflow;
+
+err_free:
+ kfree(mflow);
+ return ERR_PTR(err);
+}
+
+static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ int err, ret = 0;
+ int i = 0;
+ struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
+ struct mlx4_ib_flow *mflow = to_mflow(flow_id);
+
+ while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
+ if (err)
+ ret = err;
+ i++;
+ }
+
+ kfree(mflow);
+ return ret;
+}
+
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
@@ -1461,6 +1687,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ if (check_flow_steering_support(dev)) {
+ ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
+ ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
+
+ ibdev->ib_dev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
+ }
+
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index f61ec26500c4..036b663dd26e 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -132,6 +132,12 @@ struct mlx4_ib_fmr {
struct mlx4_fmr mfmr;
};
+struct mlx4_ib_flow {
+ struct ib_flow ibflow;
+ /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
+ u64 reg_id[2];
+};
+
struct mlx4_ib_wq {
u64 *wrid;
spinlock_t lock;
@@ -552,6 +558,12 @@ static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
}
+
+static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow)
+{
+ return container_of(ibflow, struct mlx4_ib_flow, ibflow);
+}
+
static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mlx4_ib_qp, ibqp);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 24b9f1a0107b..6b29249aa85a 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2998,6 +2998,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
u8 *start_ptr = &start_addr;
u8 **start_buff = &start_ptr;
u16 buff_len = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
if (!ibqp)
@@ -3062,8 +3064,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
/* setup our first outgoing iWarp send WQE (the IETF frame response) */
wqe = &nesqp->hwqp.sq_vbase[0];
- if (cm_id->remote_addr.sin_addr.s_addr !=
- cm_id->local_addr.sin_addr.s_addr) {
+ if (raddr->sin_addr.s_addr != laddr->sin_addr.s_addr) {
u64temp = (unsigned long)nesqp;
nesibdev = nesvnic->nesibdev;
nespd = nesqp->nespd;
@@ -3132,13 +3133,10 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nes_cm_init_tsa_conn(nesqp, cm_node);
- nesqp->nesqp_context->tcpPorts[0] =
- cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
- nesqp->nesqp_context->tcpPorts[1] =
- cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
+ nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
+ nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
- nesqp->nesqp_context->ip0 =
- cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+ nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
nesqp->nesqp_context->misc2 |= cpu_to_le32(
(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3162,9 +3160,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
memset(&nes_quad, 0, sizeof(nes_quad));
nes_quad.DstIpAdrIndex =
cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
- nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
- nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+ nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
+ nes_quad.TcpPorts[0] = raddr->sin_port;
+ nes_quad.TcpPorts[1] = laddr->sin_port;
/* Produce hash key */
crc_value = get_crc_value(&nes_quad);
@@ -3180,10 +3178,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
"0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
"private data length=%u.\n", nesqp->hwqp.qp_id,
- ntohl(cm_id->remote_addr.sin_addr.s_addr),
- ntohs(cm_id->remote_addr.sin_port),
- ntohl(cm_id->local_addr.sin_addr.s_addr),
- ntohs(cm_id->local_addr.sin_port),
+ ntohl(raddr->sin_addr.s_addr), ntohs(raddr->sin_port),
+ ntohl(laddr->sin_addr.s_addr), ntohs(laddr->sin_port),
le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
le32_to_cpu(nesqp->nesqp_context->snd_nxt),
buff_len);
@@ -3263,7 +3259,11 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct nes_cm_node *cm_node;
struct nes_cm_info cm_info;
int apbvt_set = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+ if (cm_id->remote_addr.ss_family != AF_INET)
+ return -ENOSYS;
ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
if (!ibqp)
return -EINVAL;
@@ -3277,16 +3277,14 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!nesdev)
return -EINVAL;
- if (!(cm_id->local_addr.sin_port) || !(cm_id->remote_addr.sin_port))
+ if (!laddr->sin_port || !raddr->sin_port)
return -EINVAL;
nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
"0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
- ntohl(nesvnic->local_ipaddr),
- ntohl(cm_id->remote_addr.sin_addr.s_addr),
- ntohs(cm_id->remote_addr.sin_port),
- ntohl(cm_id->local_addr.sin_addr.s_addr),
- ntohs(cm_id->local_addr.sin_port));
+ ntohl(nesvnic->local_ipaddr), ntohl(raddr->sin_addr.s_addr),
+ ntohs(raddr->sin_port), ntohl(laddr->sin_addr.s_addr),
+ ntohs(laddr->sin_port));
atomic_inc(&cm_connects);
nesqp->active_conn = 1;
@@ -3306,18 +3304,18 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
conn_param->private_data_len);
- if (cm_id->local_addr.sin_addr.s_addr !=
- cm_id->remote_addr.sin_addr.s_addr) {
- nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
- PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+ if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
+ nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+ PCI_FUNC(nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_ADD);
apbvt_set = 1;
}
/* set up the connection params for the node */
- cm_info.loc_addr = htonl(cm_id->local_addr.sin_addr.s_addr);
- cm_info.loc_port = htons(cm_id->local_addr.sin_port);
- cm_info.rem_addr = htonl(cm_id->remote_addr.sin_addr.s_addr);
- cm_info.rem_port = htons(cm_id->remote_addr.sin_port);
+ cm_info.loc_addr = htonl(laddr->sin_addr.s_addr);
+ cm_info.loc_port = htons(laddr->sin_port);
+ cm_info.rem_addr = htonl(raddr->sin_addr.s_addr);
+ cm_info.rem_port = htons(raddr->sin_port);
cm_info.cm_id = cm_id;
cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
@@ -3329,7 +3327,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
&cm_info);
if (!cm_node) {
if (apbvt_set)
- nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
+ nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
PCI_FUNC(nesdev->pcidev->devfn),
NES_MANAGE_APBVT_DEL);
@@ -3355,10 +3353,13 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
struct nes_cm_listener *cm_node;
struct nes_cm_info cm_info;
int err;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
- cm_id, ntohs(cm_id->local_addr.sin_port));
+ cm_id, ntohs(laddr->sin_port));
+ if (cm_id->local_addr.ss_family != AF_INET)
+ return -ENOSYS;
nesvnic = to_nesvnic(cm_id->device);
if (!nesvnic)
return -EINVAL;
@@ -3367,11 +3368,11 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
nesvnic, nesvnic->netdev, nesvnic->netdev->name);
nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n",
- nesvnic->local_ipaddr, cm_id->local_addr.sin_addr.s_addr);
+ nesvnic->local_ipaddr, laddr->sin_addr.s_addr);
/* setup listen params in our api call struct */
cm_info.loc_addr = nesvnic->local_ipaddr;
- cm_info.loc_port = cm_id->local_addr.sin_port;
+ cm_info.loc_port = laddr->sin_port;
cm_info.backlog = backlog;
cm_info.cm_id = cm_id;
@@ -3388,8 +3389,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = cm_node;
if (!cm_node->reused_node) {
- err = nes_manage_apbvt(nesvnic,
- ntohs(cm_id->local_addr.sin_port),
+ err = nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
NES_MANAGE_APBVT_ADD);
if (err) {
@@ -3487,6 +3487,9 @@ static void cm_event_connected(struct nes_cm_event *event)
struct nes_v4_quad nes_quad;
u32 crc_value;
int ret;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct sockaddr_in *cm_event_laddr;
/* get all our handles */
cm_node = event->cm_node;
@@ -3496,27 +3499,24 @@ static void cm_event_connected(struct nes_cm_event *event)
nesvnic = to_nesvnic(nesqp->ibqp.device);
nesdev = nesvnic->nesdev;
nesadapter = nesdev->nesadapter;
+ laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+ cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr;
if (nesqp->destroyed)
return;
atomic_inc(&cm_connecteds);
nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on"
" local port 0x%04X. jiffies = %lu.\n",
- nesqp->hwqp.qp_id,
- ntohl(cm_id->remote_addr.sin_addr.s_addr),
- ntohs(cm_id->remote_addr.sin_port),
- ntohs(cm_id->local_addr.sin_port),
- jiffies);
+ nesqp->hwqp.qp_id, ntohl(raddr->sin_addr.s_addr),
+ ntohs(raddr->sin_port), ntohs(laddr->sin_port), jiffies);
nes_cm_init_tsa_conn(nesqp, cm_node);
/* set the QP tsa context */
- nesqp->nesqp_context->tcpPorts[0] =
- cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
- nesqp->nesqp_context->tcpPorts[1] =
- cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
- nesqp->nesqp_context->ip0 =
- cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+ nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
+ nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
+ nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
nesqp->nesqp_context->misc2 |= cpu_to_le32(
(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3544,9 +3544,9 @@ static void cm_event_connected(struct nes_cm_event *event)
nes_quad.DstIpAdrIndex =
cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
- nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
- nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+ nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
+ nes_quad.TcpPorts[0] = raddr->sin_port;
+ nes_quad.TcpPorts[1] = laddr->sin_port;
/* Produce hash key */
crc_value = get_crc_value(&nes_quad);
@@ -3565,8 +3565,8 @@ static void cm_event_connected(struct nes_cm_event *event)
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
cm_event.status = 0;
cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr.sin_family = AF_INET;
- cm_event.local_addr.sin_port = cm_id->local_addr.sin_port;
+ cm_event_laddr->sin_family = AF_INET;
+ cm_event_laddr->sin_port = laddr->sin_port;
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
@@ -3574,7 +3574,7 @@ static void cm_event_connected(struct nes_cm_event *event)
cm_event.ird = cm_node->ird_size;
cm_event.ord = cm_node->ord_size;
- cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr;
+ cm_event_laddr->sin_addr.s_addr = event->cm_info.rem_addr;
ret = cm_id->event_handler(cm_id, &cm_event);
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
@@ -3627,9 +3627,16 @@ static void cm_event_connect_error(struct nes_cm_event *event)
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
- nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, "
- "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr,
- cm_event.remote_addr.sin_addr.s_addr);
+#ifdef CONFIG_INFINIBAND_NES_DEBUG
+ {
+ struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
+ &cm_event.local_addr;
+ struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
+ &cm_event.remote_addr;
+ nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remote_addr=%08x\n",
+ cm_event_laddr->sin_addr.s_addr, cm_event_raddr->sin_addr.s_addr);
+ }
+#endif
ret = cm_id->event_handler(cm_id, &cm_event);
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
@@ -3709,6 +3716,10 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
struct iw_cm_event cm_event;
int ret;
struct nes_cm_node *cm_node;
+ struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
+ &cm_event.local_addr;
+ struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
+ &cm_event.remote_addr;
cm_node = event->cm_node;
if (!cm_node)
@@ -3723,13 +3734,13 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
cm_event.status = 0;
cm_event.provider_data = (void *)cm_node;
- cm_event.local_addr.sin_family = AF_INET;
- cm_event.local_addr.sin_port = htons(event->cm_info.loc_port);
- cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr);
+ cm_event_laddr->sin_family = AF_INET;
+ cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
+ cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
- cm_event.remote_addr.sin_family = AF_INET;
- cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
- cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
+ cm_event_raddr->sin_family = AF_INET;
+ cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
+ cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
cm_event.private_data = cm_node->mpa_frame_buf;
cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
cm_event.ird = cm_node->ird_size;
@@ -3749,6 +3760,10 @@ static void cm_event_mpa_reject(struct nes_cm_event *event)
struct iw_cm_event cm_event;
struct nes_cm_node *cm_node;
int ret;
+ struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
+ &cm_event.local_addr;
+ struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
+ &cm_event.remote_addr;
cm_node = event->cm_node;
if (!cm_node)
@@ -3763,21 +3778,21 @@ static void cm_event_mpa_reject(struct nes_cm_event *event)
cm_event.status = -ECONNREFUSED;
cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr.sin_family = AF_INET;
- cm_event.local_addr.sin_port = htons(event->cm_info.loc_port);
- cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr);
+ cm_event_laddr->sin_family = AF_INET;
+ cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
+ cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
- cm_event.remote_addr.sin_family = AF_INET;
- cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
- cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
+ cm_event_raddr->sin_family = AF_INET;
+ cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
+ cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
cm_event.private_data = cm_node->mpa_frame_buf;
cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, "
"remove_addr=%08x\n",
- cm_event.local_addr.sin_addr.s_addr,
- cm_event.remote_addr.sin_addr.s_addr);
+ cm_event_laddr->sin_addr.s_addr,
+ cm_event_raddr->sin_addr.s_addr);
ret = cm_id->event_handler(cm_id, &cm_event);
if (ret)
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 4a9af795b88f..1946101419a3 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -89,7 +89,6 @@ struct qlogic_ib_stats {
extern struct qlogic_ib_stats qib_stats;
extern const struct pci_error_handlers qib_pci_err_handler;
-extern struct pci_driver qib_driver;
#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ
/*
@@ -576,11 +575,13 @@ struct qib_pportdata {
/* read/write using lock */
spinlock_t sdma_lock ____cacheline_aligned_in_smp;
struct list_head sdma_activelist;
+ struct list_head sdma_userpending;
u64 sdma_descq_added;
u64 sdma_descq_removed;
u16 sdma_descq_tail;
u16 sdma_descq_head;
u8 sdma_generation;
+ u8 sdma_intrequest;
struct tasklet_struct sdma_sw_clean_up_task
____cacheline_aligned_in_smp;
@@ -1326,6 +1327,8 @@ int qib_setup_sdma(struct qib_pportdata *);
void qib_teardown_sdma(struct qib_pportdata *);
void __qib_sdma_intr(struct qib_pportdata *);
void qib_sdma_intr(struct qib_pportdata *);
+void qib_user_sdma_send_desc(struct qib_pportdata *dd,
+ struct list_head *pktlist);
int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
u32, struct qib_verbs_txreq *);
/* ppd->sdma_lock should be locked before calling this. */
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 4f255b723ffd..5670ace27c63 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -279,7 +279,7 @@ struct qib_base_info {
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
-#define QIB_USER_SWMINOR 12
+#define QIB_USER_SWMINOR 13
#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR)
@@ -701,7 +701,37 @@ struct qib_message_header {
__be32 bth[3];
/* fields below this point are in host byte order */
struct qib_header iph;
+ /* fields below are simplified, but should match PSM */
+ /* some are accessed by driver when packet spliting is needed */
__u8 sub_opcode;
+ __u8 flags;
+ __u16 commidx;
+ __u32 ack_seq_num;
+ __u8 flowid;
+ __u8 hdr_dlen;
+ __u16 mqhdr;
+ __u32 uwords[4];
+};
+
+/* sequence number bits for message */
+union qib_seqnum {
+ struct {
+ __u32 seq:11;
+ __u32 gen:8;
+ __u32 flow:5;
+ };
+ struct {
+ __u32 pkt:16;
+ __u32 msg:8;
+ };
+ __u32 val;
+};
+
+/* qib receiving-dma tid-session-member */
+struct qib_tid_session_member {
+ __u16 tid;
+ __u16 offset;
+ __u16 length;
};
/* IB - LRH header consts */
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index b51a51486cb8..275f247f9fca 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1220,7 +1220,7 @@ static int qib_compatible_subctxts(int user_swmajor, int user_swminor)
return user_swminor == 3;
default:
/* >= 4 are compatible (or are expected to be) */
- return user_swminor >= 4;
+ return user_swminor <= QIB_USER_SWMINOR;
}
}
/* make no promises yet for future major versions */
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 36e048e0e1d9..24e802f4ea2f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1193,7 +1193,7 @@ static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = {
MODULE_DEVICE_TABLE(pci, qib_pci_tbl);
-struct pci_driver qib_driver = {
+static struct pci_driver qib_driver = {
.name = QIB_DRV_NAME,
.probe = qib_init_one,
.remove = qib_remove_one,
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 57bd3fa016bc..28874f8606f8 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -415,7 +415,6 @@ struct cc_table_shadow {
struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
} __packed;
-#endif /* _QIB_MAD_H */
/*
* The PortSamplesControl.CounterMasks field is an array of 3 bit fields
* which specify the N'th counter's capabilities. See ch. 16.1.3.2.
@@ -428,3 +427,5 @@ struct cc_table_shadow {
COUNTER_MASK(1, 2) | \
COUNTER_MASK(1, 3) | \
COUNTER_MASK(1, 4))
+
+#endif /* _QIB_MAD_H */
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index c574ec7c85e6..3f14009fb662 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -283,12 +283,12 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
goto bail;
}
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX);
+ pos = dd->pcidev->msix_cap;
if (nent && *nent && pos) {
qib_msix_setup(dd, pos, nent, entry);
ret = 0; /* did it, either MSIx or INTx */
} else {
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+ pos = dd->pcidev->msi_cap;
if (pos)
ret = qib_msi_setup(dd, pos);
else
@@ -357,7 +357,7 @@ int qib_reinit_intr(struct qib_devdata *dd)
if (!dd->msi_lo)
goto bail;
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+ pos = dd->pcidev->msi_cap;
if (!pos) {
qib_dev_err(dd,
"Can't find MSI capability, can't restore MSI settings\n");
@@ -426,7 +426,7 @@ void qib_enable_intx(struct pci_dev *pdev)
if (new != cw)
pci_write_config_word(pdev, PCI_COMMAND, new);
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ pos = pdev->msi_cap;
if (pos) {
/* then turn off MSI */
pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
@@ -434,7 +434,7 @@ void qib_enable_intx(struct pci_dev *pdev)
if (new != cw)
pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new);
}
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+ pos = pdev->msix_cap;
if (pos) {
/* then turn off MSIx */
pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw);
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 9b5322d8cd5a..c6d6a54d2e19 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -423,8 +423,11 @@ void qib_sdma_intr(struct qib_pportdata *ppd)
void __qib_sdma_intr(struct qib_pportdata *ppd)
{
- if (__qib_sdma_running(ppd))
+ if (__qib_sdma_running(ppd)) {
qib_sdma_make_progress(ppd);
+ if (!list_empty(&ppd->sdma_userpending))
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ }
}
int qib_setup_sdma(struct qib_pportdata *ppd)
@@ -452,6 +455,9 @@ int qib_setup_sdma(struct qib_pportdata *ppd)
ppd->sdma_descq_removed = 0;
ppd->sdma_descq_added = 0;
+ ppd->sdma_intrequest = 0;
+ INIT_LIST_HEAD(&ppd->sdma_userpending);
+
INIT_LIST_HEAD(&ppd->sdma_activelist);
tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 82442085cbe6..d0a0ea0c14d6 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -53,20 +53,36 @@
#define QIB_USER_SDMA_DRAIN_TIMEOUT 500
struct qib_user_sdma_pkt {
- u8 naddr; /* dimension of addr (1..3) ... */
+ struct list_head list; /* list element */
+
+ u8 tiddma; /* if this is NEW tid-sdma */
+ u8 largepkt; /* this is large pkt from kmalloc */
+ u16 frag_size; /* frag size used by PSM */
+ u16 index; /* last header index or push index */
+ u16 naddr; /* dimension of addr (1..3) ... */
+ u16 addrlimit; /* addr array size */
+ u16 tidsmidx; /* current tidsm index */
+ u16 tidsmcount; /* tidsm array item count */
+ u16 payload_size; /* payload size so far for header */
+ u32 bytes_togo; /* bytes for processing */
u32 counter; /* sdma pkts queued counter for this entry */
+ struct qib_tid_session_member *tidsm; /* tid session member array */
+ struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
u64 added; /* global descq number of entries */
struct {
- u32 offset; /* offset for kvaddr, addr */
- u32 length; /* length in page */
- u8 put_page; /* should we put_page? */
- u8 dma_mapped; /* is page dma_mapped? */
+ u16 offset; /* offset for kvaddr, addr */
+ u16 length; /* length in page */
+ u16 first_desc; /* first desc */
+ u16 last_desc; /* last desc */
+ u16 put_page; /* should we put_page? */
+ u16 dma_mapped; /* is page dma_mapped? */
+ u16 dma_length; /* for dma_unmap_page() */
+ u16 padding;
struct page *page; /* may be NULL (coherent mem) */
void *kvaddr; /* FIXME: only for pio hack */
dma_addr_t addr;
} addr[4]; /* max pages, any more and we coalesce */
- struct list_head list; /* list element */
};
struct qib_user_sdma_queue {
@@ -77,6 +93,12 @@ struct qib_user_sdma_queue {
*/
struct list_head sent;
+ /*
+ * Because above list will be accessed by both process and
+ * signal handler, we need a spinlock for it.
+ */
+ spinlock_t sent_lock ____cacheline_aligned_in_smp;
+
/* headers with expected length are allocated from here... */
char header_cache_name[64];
struct dma_pool *header_cache;
@@ -88,6 +110,12 @@ struct qib_user_sdma_queue {
/* as packets go on the queued queue, they are counted... */
u32 counter;
u32 sent_counter;
+ /* pending packets, not sending yet */
+ u32 num_pending;
+ /* sending packets, not complete yet */
+ u32 num_sending;
+ /* global descq number of entry of last sending packet */
+ u64 added;
/* dma page table */
struct rb_root dma_pages_root;
@@ -107,8 +135,12 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
pq->counter = 0;
pq->sent_counter = 0;
- INIT_LIST_HEAD(&pq->sent);
+ pq->num_pending = 0;
+ pq->num_sending = 0;
+ pq->added = 0;
+ INIT_LIST_HEAD(&pq->sent);
+ spin_lock_init(&pq->sent_lock);
mutex_init(&pq->lock);
snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
@@ -144,34 +176,310 @@ done:
}
static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
- int i, size_t offset, size_t len,
- int put_page, int dma_mapped,
- struct page *page,
- void *kvaddr, dma_addr_t dma_addr)
+ int i, u16 offset, u16 len,
+ u16 first_desc, u16 last_desc,
+ u16 put_page, u16 dma_mapped,
+ struct page *page, void *kvaddr,
+ dma_addr_t dma_addr, u16 dma_length)
{
pkt->addr[i].offset = offset;
pkt->addr[i].length = len;
+ pkt->addr[i].first_desc = first_desc;
+ pkt->addr[i].last_desc = last_desc;
pkt->addr[i].put_page = put_page;
pkt->addr[i].dma_mapped = dma_mapped;
pkt->addr[i].page = page;
pkt->addr[i].kvaddr = kvaddr;
pkt->addr[i].addr = dma_addr;
+ pkt->addr[i].dma_length = dma_length;
}
-static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt,
- u32 counter, size_t offset,
- size_t len, int dma_mapped,
- struct page *page,
- void *kvaddr, dma_addr_t dma_addr)
+static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+ size_t len, dma_addr_t *dma_addr)
{
- pkt->naddr = 1;
- pkt->counter = counter;
- qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
- kvaddr, dma_addr);
+ void *hdr;
+
+ if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
+ hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+ dma_addr);
+ else
+ hdr = NULL;
+
+ if (!hdr) {
+ hdr = kmalloc(len, GFP_KERNEL);
+ if (!hdr)
+ return NULL;
+
+ *dma_addr = 0;
+ }
+
+ return hdr;
+}
+
+static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ struct page *page, u16 put,
+ u16 offset, u16 len, void *kvaddr)
+{
+ __le16 *pbc16;
+ void *pbcvaddr;
+ struct qib_message_header *hdr;
+ u16 newlen, pbclen, lastdesc, dma_mapped;
+ u32 vcto;
+ union qib_seqnum seqnum;
+ dma_addr_t pbcdaddr;
+ dma_addr_t dma_addr =
+ dma_map_page(&dd->pcidev->dev,
+ page, offset, len, DMA_TO_DEVICE);
+ int ret = 0;
+
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ /*
+ * dma mapping error, pkt has not managed
+ * this page yet, return the page here so
+ * the caller can ignore this page.
+ */
+ if (put) {
+ put_page(page);
+ } else {
+ /* coalesce case */
+ kunmap(page);
+ __free_page(page);
+ }
+ ret = -ENOMEM;
+ goto done;
+ }
+ offset = 0;
+ dma_mapped = 1;
+
+
+next_fragment:
+
+ /*
+ * In tid-sdma, the transfer length is restricted by
+ * receiver side current tid page length.
+ */
+ if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length)
+ newlen = pkt->tidsm[pkt->tidsmidx].length;
+ else
+ newlen = len;
+
+ /*
+ * Then the transfer length is restricted by MTU.
+ * the last descriptor flag is determined by:
+ * 1. the current packet is at frag size length.
+ * 2. the current tid page is done if tid-sdma.
+ * 3. there is no more byte togo if sdma.
+ */
+ lastdesc = 0;
+ if ((pkt->payload_size + newlen) >= pkt->frag_size) {
+ newlen = pkt->frag_size - pkt->payload_size;
+ lastdesc = 1;
+ } else if (pkt->tiddma) {
+ if (newlen == pkt->tidsm[pkt->tidsmidx].length)
+ lastdesc = 1;
+ } else {
+ if (newlen == pkt->bytes_togo)
+ lastdesc = 1;
+ }
+
+ /* fill the next fragment in this page */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ offset, newlen, /* offset, len */
+ 0, lastdesc, /* first last desc */
+ put, dma_mapped, /* put page, dma mapped */
+ page, kvaddr, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->bytes_togo -= newlen;
+ pkt->payload_size += newlen;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* If there is no more byte togo. (lastdesc==1) */
+ if (pkt->bytes_togo == 0) {
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ goto done;
+ }
+
+ /* If tid-sdma, advance tid info. */
+ if (pkt->tiddma) {
+ pkt->tidsm[pkt->tidsmidx].length -= newlen;
+ if (pkt->tidsm[pkt->tidsmidx].length) {
+ pkt->tidsm[pkt->tidsmidx].offset += newlen;
+ } else {
+ pkt->tidsmidx++;
+ if (pkt->tidsmidx == pkt->tidsmcount) {
+ ret = -EFAULT;
+ goto done;
+ }
+ }
+ }
+
+ /*
+ * If this is NOT the last descriptor. (newlen==len)
+ * the current packet is not done yet, but the current
+ * send side page is done.
+ */
+ if (lastdesc == 0)
+ goto done;
+
+ /*
+ * If running this driver under PSM with message size
+ * fitting into one transfer unit, it is not possible
+ * to pass this line. otherwise, it is a buggggg.
+ */
+
+ /*
+ * Since the current packet is done, and there are more
+ * bytes togo, we need to create a new sdma header, copying
+ * from previous sdma header and modify both.
+ */
+ pbclen = pkt->addr[pkt->index].length;
+ pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr);
+ if (!pbcvaddr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Copy the previous sdma header to new sdma header */
+ pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr;
+ memcpy(pbcvaddr, pbc16, pbclen);
+
+ /* Modify the previous sdma header */
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* turn on the header suppression */
+ hdr->iph.pkt_flags =
+ cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2);
+ /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */
+ hdr->flags &= ~(0x04|0x20);
+ } else {
+ /* turn off extra bytes: 20-21 bits */
+ hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF);
+ /* turn off ACK_REQ: 0x04 */
+ hdr->flags &= ~(0x04);
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ /* Modify the new sdma header */
+ pbc16 = (__le16 *)pbcvaddr;
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* Set new tid and offset for new sdma header */
+ hdr->iph.ver_ctxt_tid_offset = cpu_to_le32(
+ (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) +
+ (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) +
+ (pkt->tidsm[pkt->tidsmidx].offset>>2));
+ } else {
+ /* Middle protocol new packet offset */
+ hdr->uwords[2] += pkt->payload_size;
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* Next sequence number in new sdma header */
+ seqnum.val = be32_to_cpu(hdr->bth[2]);
+ if (pkt->tiddma)
+ seqnum.seq++;
+ else
+ seqnum.pkt++;
+ hdr->bth[2] = cpu_to_be32(seqnum.val);
+
+ /* Init new sdma header. */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ 0, pbclen, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbcvaddr, /* struct page, virt addr */
+ pbcdaddr, pbclen); /* dma addr, dma length */
+ pkt->index = pkt->naddr;
+ pkt->payload_size = 0;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* Prepare for next fragment in this page */
+ if (newlen != len) {
+ if (dma_mapped) {
+ put = 0;
+ dma_mapped = 0;
+ page = NULL;
+ kvaddr = NULL;
+ }
+ len -= newlen;
+ offset += newlen;
+
+ goto next_fragment;
+ }
+
+done:
+ return ret;
}
/* we've too many pages in the iovec, coalesce to a single page */
static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
const struct iovec *iov,
unsigned long niov)
@@ -182,7 +490,6 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
char *mpage;
int i;
int len = 0;
- dma_addr_t dma_addr;
if (!page) {
ret = -ENOMEM;
@@ -205,17 +512,8 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
len += iov[i].iov_len;
}
- dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
- ret = -ENOMEM;
- goto free_unmap;
- }
-
- qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
- dma_addr);
- pkt->naddr = 2;
-
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ page, 0, 0, len, mpage_save);
goto done;
free_unmap:
@@ -238,16 +536,6 @@ static int qib_user_sdma_num_pages(const struct iovec *iov)
return 1 + ((epage - spage) >> PAGE_SHIFT);
}
-/*
- * Truncate length to page boundary.
- */
-static int qib_user_sdma_page_length(unsigned long addr, unsigned long len)
-{
- const unsigned long offset = addr & ~PAGE_MASK;
-
- return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
-}
-
static void qib_user_sdma_free_pkt_frag(struct device *dev,
struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
@@ -256,10 +544,11 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
const int i = frag;
if (pkt->addr[i].page) {
+ /* only user data has page */
if (pkt->addr[i].dma_mapped)
dma_unmap_page(dev,
pkt->addr[i].addr,
- pkt->addr[i].length,
+ pkt->addr[i].dma_length,
DMA_TO_DEVICE);
if (pkt->addr[i].kvaddr)
@@ -269,55 +558,81 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
put_page(pkt->addr[i].page);
else
__free_page(pkt->addr[i].page);
- } else if (pkt->addr[i].kvaddr)
- /* free coherent mem from cache... */
- dma_pool_free(pq->header_cache,
+ } else if (pkt->addr[i].kvaddr) {
+ /* for headers */
+ if (pkt->addr[i].dma_mapped) {
+ /* from kmalloc & dma mapped */
+ dma_unmap_single(dev,
+ pkt->addr[i].addr,
+ pkt->addr[i].dma_length,
+ DMA_TO_DEVICE);
+ kfree(pkt->addr[i].kvaddr);
+ } else if (pkt->addr[i].addr) {
+ /* free coherent mem from cache... */
+ dma_pool_free(pq->header_cache,
pkt->addr[i].kvaddr, pkt->addr[i].addr);
+ } else {
+ /* from kmalloc but not dma mapped */
+ kfree(pkt->addr[i].kvaddr);
+ }
+ }
}
/* return number of pages pinned... */
static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
unsigned long addr, int tlen, int npages)
{
- struct page *pages[2];
- int j;
- int ret;
-
- ret = get_user_pages(current, current->mm, addr,
- npages, 0, 1, pages, NULL);
-
- if (ret != npages) {
- int i;
-
- for (i = 0; i < ret; i++)
- put_page(pages[i]);
-
- ret = -ENOMEM;
- goto done;
- }
+ struct page *pages[8];
+ int i, j;
+ int ret = 0;
- for (j = 0; j < npages; j++) {
- /* map the pages... */
- const int flen = qib_user_sdma_page_length(addr, tlen);
- dma_addr_t dma_addr =
- dma_map_page(&dd->pcidev->dev,
- pages[j], 0, flen, DMA_TO_DEVICE);
- unsigned long fofs = addr & ~PAGE_MASK;
+ while (npages) {
+ if (npages > 8)
+ j = 8;
+ else
+ j = npages;
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = get_user_pages(current, current->mm, addr,
+ j, 0, 1, pages, NULL);
+ if (ret != j) {
+ i = 0;
+ j = ret;
ret = -ENOMEM;
- goto done;
+ goto free_pages;
}
- qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
- pages[j], kmap(pages[j]), dma_addr);
+ for (i = 0; i < j; i++) {
+ /* map the pages... */
+ unsigned long fofs = addr & ~PAGE_MASK;
+ int flen = ((fofs + tlen) > PAGE_SIZE) ?
+ (PAGE_SIZE - fofs) : tlen;
+
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ pages[i], 1, fofs, flen, NULL);
+ if (ret < 0) {
+ /* current page has beed taken
+ * care of inside above call.
+ */
+ i++;
+ goto free_pages;
+ }
- pkt->naddr++;
- addr += flen;
- tlen -= flen;
+ addr += flen;
+ tlen -= flen;
+ }
+
+ npages -= j;
}
+ goto done;
+
+ /* if error, return all pages not managed by pkt */
+free_pages:
+ while (i < j)
+ put_page(pages[i++]);
+
done:
return ret;
}
@@ -335,7 +650,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
const int npages = qib_user_sdma_num_pages(iov + idx);
const unsigned long addr = (unsigned long) iov[idx].iov_base;
- ret = qib_user_sdma_pin_pages(dd, pkt, addr,
+ ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr,
iov[idx].iov_len, npages);
if (ret < 0)
goto free_pkt;
@@ -344,9 +659,22 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
goto done;
free_pkt:
- for (idx = 0; idx < pkt->naddr; idx++)
+ /* we need to ignore the first entry here */
+ for (idx = 1; idx < pkt->naddr; idx++)
qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+ /* need to dma unmap the first entry, this is to restore to
+ * the original state so that caller can free the memory in
+ * error condition. Caller does not know if dma mapped or not*/
+ if (pkt->addr[0].dma_mapped) {
+ dma_unmap_single(&dd->pcidev->dev,
+ pkt->addr[0].addr,
+ pkt->addr[0].dma_length,
+ DMA_TO_DEVICE);
+ pkt->addr[0].addr = 0;
+ pkt->addr[0].dma_mapped = 0;
+ }
+
done:
return ret;
}
@@ -359,8 +687,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
{
int ret = 0;
- if (npages >= ARRAY_SIZE(pkt->addr))
- ret = qib_user_sdma_coalesce(dd, pkt, iov, niov);
+ if (pkt->frag_size == pkt->bytes_togo &&
+ npages >= ARRAY_SIZE(pkt->addr))
+ ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov);
else
ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
@@ -380,7 +709,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
for (i = 0; i < pkt->naddr; i++)
qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
- kmem_cache_free(pq->pkt_slab, pkt);
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
}
INIT_LIST_HEAD(list);
}
@@ -393,63 +725,48 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
* as, if there is an error we clean it...
*/
static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+ struct qib_pportdata *ppd,
struct qib_user_sdma_queue *pq,
- struct list_head *list,
const struct iovec *iov,
unsigned long niov,
- int maxpkts)
+ struct list_head *list,
+ int *maxpkts, int *ndesc)
{
unsigned long idx = 0;
int ret = 0;
int npkts = 0;
- struct page *page = NULL;
__le32 *pbc;
dma_addr_t dma_addr;
struct qib_user_sdma_pkt *pkt = NULL;
size_t len;
size_t nw;
u32 counter = pq->counter;
- int dma_mapped = 0;
+ u16 frag_size;
- while (idx < niov && npkts < maxpkts) {
+ while (idx < niov && npkts < *maxpkts) {
const unsigned long addr = (unsigned long) iov[idx].iov_base;
const unsigned long idx_save = idx;
unsigned pktnw;
unsigned pktnwc;
int nfrags = 0;
int npages = 0;
+ int bytes_togo = 0;
+ int tiddma = 0;
int cfur;
- dma_mapped = 0;
len = iov[idx].iov_len;
nw = len >> 2;
- page = NULL;
-
- pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
- if (!pkt) {
- ret = -ENOMEM;
- goto free_list;
- }
if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH ||
len > PAGE_SIZE || len & 3 || addr & 3) {
ret = -EINVAL;
- goto free_pkt;
+ goto free_list;
}
- if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
- pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
- &dma_addr);
- else
- pbc = NULL;
-
+ pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr);
if (!pbc) {
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- ret = -ENOMEM;
- goto free_pkt;
- }
- pbc = kmap(page);
+ ret = -ENOMEM;
+ goto free_list;
}
cfur = copy_from_user(pbc, iov[idx].iov_base, len);
@@ -474,8 +791,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
* we can verify that the packet is consistent with the
* iovec lengths.
*/
- pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK;
- if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
+ pktnw = le32_to_cpu(*pbc) & 0xFFFF;
+ if (pktnw < pktnwc) {
ret = -EINVAL;
goto free_pbc;
}
@@ -486,17 +803,14 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
const unsigned long faddr =
(unsigned long) iov[idx].iov_base;
- if (slen & 3 || faddr & 3 || !slen ||
- slen > PAGE_SIZE) {
+ if (slen & 3 || faddr & 3 || !slen) {
ret = -EINVAL;
goto free_pbc;
}
- npages++;
- if ((faddr & PAGE_MASK) !=
- ((faddr + slen - 1) & PAGE_MASK))
- npages++;
+ npages += qib_user_sdma_num_pages(&iov[idx]);
+ bytes_togo += slen;
pktnwc += slen >> 2;
idx++;
nfrags++;
@@ -507,48 +821,139 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
goto free_pbc;
}
- if (page) {
- dma_addr = dma_map_page(&dd->pcidev->dev,
- page, 0, len, DMA_TO_DEVICE);
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF;
+ if (((frag_size ? frag_size : bytes_togo) + len) >
+ ppd->ibmaxlen) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ if (frag_size) {
+ int pktsize, tidsmsize, n;
+
+ n = npages*((2*PAGE_SIZE/frag_size)+1);
+ pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n;
+
+ /*
+ * Determine if this is tid-sdma or just sdma.
+ */
+ tiddma = (((le32_to_cpu(pbc[7])>>
+ QLOGIC_IB_I_TID_SHIFT)&
+ QLOGIC_IB_I_TID_MASK) !=
+ QLOGIC_IB_I_TID_MASK);
+
+ if (tiddma)
+ tidsmsize = iov[idx].iov_len;
+ else
+ tidsmsize = 0;
+
+ pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL);
+ if (!pkt) {
ret = -ENOMEM;
goto free_pbc;
}
+ pkt->largepkt = 1;
+ pkt->frag_size = frag_size;
+ pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
+
+ if (tiddma) {
+ char *tidsm = (char *)pkt + pktsize;
+ cfur = copy_from_user(tidsm,
+ iov[idx].iov_base, tidsmsize);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_pkt;
+ }
+ pkt->tidsm =
+ (struct qib_tid_session_member *)tidsm;
+ pkt->tidsmcount = tidsmsize/
+ sizeof(struct qib_tid_session_member);
+ pkt->tidsmidx = 0;
+ idx++;
+ }
- dma_mapped = 1;
+ /*
+ * pbc 'fill1' field is borrowed to pass frag size,
+ * we need to clear it after picking frag size, the
+ * hardware requires this field to be zero.
+ */
+ *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
+ } else {
+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+ if (!pkt) {
+ ret = -ENOMEM;
+ goto free_pbc;
+ }
+ pkt->largepkt = 0;
+ pkt->frag_size = bytes_togo;
+ pkt->addrlimit = ARRAY_SIZE(pkt->addr);
}
-
- qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
- page, pbc, dma_addr);
+ pkt->bytes_togo = bytes_togo;
+ pkt->payload_size = 0;
+ pkt->counter = counter;
+ pkt->tiddma = tiddma;
+
+ /* setup the first header */
+ qib_user_sdma_init_frag(pkt, 0, /* index */
+ 0, len, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbc, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->index = 0;
+ pkt->naddr = 1;
if (nfrags) {
ret = qib_user_sdma_init_payload(dd, pq, pkt,
iov + idx_save + 1,
nfrags, npages);
if (ret < 0)
- goto free_pbc_dma;
+ goto free_pkt;
+ } else {
+ /* since there is no payload, mark the
+ * header as the last desc. */
+ pkt->addr[0].last_desc = 1;
+
+ if (dma_addr == 0) {
+ /*
+ * the header is not dma mapped yet.
+ * it should be from kmalloc.
+ */
+ dma_addr = dma_map_single(&dd->pcidev->dev,
+ pbc, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ dma_addr)) {
+ ret = -ENOMEM;
+ goto free_pkt;
+ }
+ pkt->addr[0].addr = dma_addr;
+ pkt->addr[0].dma_mapped = 1;
+ }
}
counter++;
npkts++;
+ pkt->pq = pq;
+ pkt->index = 0; /* reset index for push on hw */
+ *ndesc += pkt->naddr;
list_add_tail(&pkt->list, list);
}
+ *maxpkts = npkts;
ret = idx;
goto done;
-free_pbc_dma:
- if (dma_mapped)
- dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
+free_pkt:
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
free_pbc:
- if (page) {
- kunmap(page);
- __free_page(page);
- } else
+ if (dma_addr)
dma_pool_free(pq->header_cache, pbc, dma_addr);
-free_pkt:
- kmem_cache_free(pq->pkt_slab, pkt);
+ else
+ kfree(pbc);
free_list:
qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
done:
@@ -569,10 +974,20 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
struct list_head free_list;
struct qib_user_sdma_pkt *pkt;
struct qib_user_sdma_pkt *pkt_prev;
+ unsigned long flags;
int ret = 0;
+ if (!pq->num_sending)
+ return 0;
+
INIT_LIST_HEAD(&free_list);
+ /*
+ * We need this spin lock here because interrupt handler
+ * might modify this list in qib_user_sdma_send_desc(), also
+ * we can not get interrupted, otherwise it is a deadlock.
+ */
+ spin_lock_irqsave(&pq->sent_lock, flags);
list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
s64 descd = ppd->sdma_descq_removed - pkt->added;
@@ -583,7 +998,9 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
/* one more packet cleaned */
ret++;
+ pq->num_sending--;
}
+ spin_unlock_irqrestore(&pq->sent_lock, flags);
if (!list_empty(&free_list)) {
u32 counter;
@@ -627,6 +1044,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
struct qib_user_sdma_queue *pq)
{
struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
int i;
if (!pq)
@@ -634,7 +1052,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) {
mutex_lock(&pq->lock);
- if (list_empty(&pq->sent)) {
+ if (!pq->num_pending && !pq->num_sending) {
mutex_unlock(&pq->lock);
break;
}
@@ -644,29 +1062,44 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
msleep(10);
}
- if (!list_empty(&pq->sent)) {
+ if (pq->num_pending || pq->num_sending) {
+ struct qib_user_sdma_pkt *pkt;
+ struct qib_user_sdma_pkt *pkt_prev;
struct list_head free_list;
+ mutex_lock(&pq->lock);
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ /*
+ * Since we hold sdma_lock, it is safe without sent_lock.
+ */
+ if (pq->num_pending) {
+ list_for_each_entry_safe(pkt, pkt_prev,
+ &ppd->sdma_userpending, list) {
+ if (pkt->pq == pq) {
+ list_move_tail(&pkt->list, &pq->sent);
+ pq->num_pending--;
+ pq->num_sending++;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
qib_dev_err(dd, "user sdma lists not empty: forcing!\n");
INIT_LIST_HEAD(&free_list);
- mutex_lock(&pq->lock);
list_splice_init(&pq->sent, &free_list);
+ pq->num_sending = 0;
qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
mutex_unlock(&pq->lock);
}
}
-static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd,
+static inline __le64 qib_sdma_make_desc0(u8 gen,
u64 addr, u64 dwlen, u64 dwoffset)
{
- u8 tmpgen;
-
- tmpgen = ppd->sdma_generation;
-
return cpu_to_le64(/* SDmaPhyAddr[31:0] */
((addr & 0xfffffffcULL) << 32) |
/* SDmaGeneration[1:0] */
- ((tmpgen & 3ULL) << 30) |
+ ((gen & 3ULL) << 30) |
/* SDmaDwordCount[10:0] */
((dwlen & 0x7ffULL) << 16) |
/* SDmaBufOffset[12:2] */
@@ -692,7 +1125,7 @@ static inline __le64 qib_sdma_make_desc1(u64 addr)
static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
struct qib_user_sdma_pkt *pkt, int idx,
- unsigned ofs, u16 tail)
+ unsigned ofs, u16 tail, u8 gen)
{
const u64 addr = (u64) pkt->addr[idx].addr +
(u64) pkt->addr[idx].offset;
@@ -702,104 +1135,132 @@ static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
descqp = &ppd->sdma_descq[tail].qw[0];
- descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs);
- if (idx == 0)
+ descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs);
+ if (pkt->addr[idx].first_desc)
descq0 = qib_sdma_make_first_desc0(descq0);
- if (idx == pkt->naddr - 1)
+ if (pkt->addr[idx].last_desc) {
descq0 = qib_sdma_make_last_desc0(descq0);
+ if (ppd->sdma_intrequest) {
+ descq0 |= cpu_to_le64(1ULL << 15);
+ ppd->sdma_intrequest = 0;
+ }
+ }
descqp[0] = descq0;
descqp[1] = qib_sdma_make_desc1(addr);
}
-/* pq->lock must be held, get packets on the wire... */
-static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
- struct qib_user_sdma_queue *pq,
- struct list_head *pktlist)
+void qib_user_sdma_send_desc(struct qib_pportdata *ppd,
+ struct list_head *pktlist)
{
struct qib_devdata *dd = ppd->dd;
- int ret = 0;
- unsigned long flags;
- u16 tail;
- u8 generation;
- u64 descq_added;
-
- if (list_empty(pktlist))
- return 0;
+ u16 nfree, nsent;
+ u16 tail, tail_c;
+ u8 gen, gen_c;
- if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
- return -ECOMM;
-
- spin_lock_irqsave(&ppd->sdma_lock, flags);
-
- /* keep a copy for restoring purposes in case of problems */
- generation = ppd->sdma_generation;
- descq_added = ppd->sdma_descq_added;
-
- if (unlikely(!__qib_sdma_running(ppd))) {
- ret = -ECOMM;
- goto unlock;
- }
+ nfree = qib_sdma_descq_freecnt(ppd);
+ if (!nfree)
+ return;
- tail = ppd->sdma_descq_tail;
+retry:
+ nsent = 0;
+ tail_c = tail = ppd->sdma_descq_tail;
+ gen_c = gen = ppd->sdma_generation;
while (!list_empty(pktlist)) {
struct qib_user_sdma_pkt *pkt =
list_entry(pktlist->next, struct qib_user_sdma_pkt,
list);
- int i;
+ int i, j, c = 0;
unsigned ofs = 0;
u16 dtail = tail;
- if (pkt->naddr > qib_sdma_descq_freecnt(ppd))
- goto unlock_check_tail;
-
- for (i = 0; i < pkt->naddr; i++) {
- qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail);
+ for (i = pkt->index; i < pkt->naddr && nfree; i++) {
+ qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen);
ofs += pkt->addr[i].length >> 2;
if (++tail == ppd->sdma_descq_cnt) {
tail = 0;
- ++ppd->sdma_generation;
+ ++gen;
+ ppd->sdma_intrequest = 1;
+ } else if (tail == (ppd->sdma_descq_cnt>>1)) {
+ ppd->sdma_intrequest = 1;
}
- }
+ nfree--;
+ if (pkt->addr[i].last_desc == 0)
+ continue;
- if ((ofs << 2) > ppd->ibmaxlen) {
- ret = -EMSGSIZE;
- goto unlock;
- }
-
- /*
- * If the packet is >= 2KB mtu equivalent, we have to use
- * the large buffers, and have to mark each descriptor as
- * part of a large buffer packet.
- */
- if (ofs > dd->piosize2kmax_dwords) {
- for (i = 0; i < pkt->naddr; i++) {
- ppd->sdma_descq[dtail].qw[0] |=
- cpu_to_le64(1ULL << 14);
- if (++dtail == ppd->sdma_descq_cnt)
- dtail = 0;
+ /*
+ * If the packet is >= 2KB mtu equivalent, we
+ * have to use the large buffers, and have to
+ * mark each descriptor as part of a large
+ * buffer packet.
+ */
+ if (ofs > dd->piosize2kmax_dwords) {
+ for (j = pkt->index; j <= i; j++) {
+ ppd->sdma_descq[dtail].qw[0] |=
+ cpu_to_le64(1ULL << 14);
+ if (++dtail == ppd->sdma_descq_cnt)
+ dtail = 0;
+ }
}
+ c += i + 1 - pkt->index;
+ pkt->index = i + 1; /* index for next first */
+ tail_c = dtail = tail;
+ gen_c = gen;
+ ofs = 0; /* reset for next packet */
}
- ppd->sdma_descq_added += pkt->naddr;
- pkt->added = ppd->sdma_descq_added;
- list_move_tail(&pkt->list, &pq->sent);
- ret++;
+ ppd->sdma_descq_added += c;
+ nsent += c;
+ if (pkt->index == pkt->naddr) {
+ pkt->added = ppd->sdma_descq_added;
+ pkt->pq->added = pkt->added;
+ pkt->pq->num_pending--;
+ spin_lock(&pkt->pq->sent_lock);
+ pkt->pq->num_sending++;
+ list_move_tail(&pkt->list, &pkt->pq->sent);
+ spin_unlock(&pkt->pq->sent_lock);
+ }
+ if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt)
+ break;
}
-unlock_check_tail:
/* advance the tail on the chip if necessary */
- if (ppd->sdma_descq_tail != tail)
- dd->f_sdma_update_tail(ppd, tail);
+ if (ppd->sdma_descq_tail != tail_c) {
+ ppd->sdma_generation = gen_c;
+ dd->f_sdma_update_tail(ppd, tail_c);
+ }
-unlock:
- if (unlikely(ret < 0)) {
- ppd->sdma_generation = generation;
- ppd->sdma_descq_added = descq_added;
+ if (nfree && !list_empty(pktlist))
+ goto retry;
+
+ return;
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq,
+ struct list_head *pktlist, int count)
+{
+ int ret = 0;
+ unsigned long flags;
+
+ if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
+ return -ECOMM;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ ret = -ECOMM;
+ goto unlock;
}
- spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ pq->num_pending += count;
+ list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+
+unlock:
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
return ret;
}
@@ -822,19 +1283,23 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
if (!qib_sdma_running(ppd))
goto done_unlock;
- if (ppd->sdma_descq_added != ppd->sdma_descq_removed) {
+ /* if I have packets not complete yet */
+ if (pq->added > ppd->sdma_descq_removed)
qib_user_sdma_hwqueue_clean(ppd);
+ /* if I have complete packets to be freed */
+ if (pq->num_sending)
qib_user_sdma_queue_clean(ppd, pq);
- }
while (dim) {
- const int mxp = 8;
+ int mxp = 8;
+ int ndesc = 0;
down_write(&current->mm->mmap_sem);
- ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
+ ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
+ iov, dim, &list, &mxp, &ndesc);
up_write(&current->mm->mmap_sem);
- if (ret <= 0)
+ if (ret < 0)
goto done_unlock;
else {
dim -= ret;
@@ -844,24 +1309,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
/* force packets onto the sdma hw queue... */
if (!list_empty(&list)) {
/*
- * Lazily clean hw queue. the 4 is a guess of about
- * how many sdma descriptors a packet will take (it
- * doesn't have to be perfect).
+ * Lazily clean hw queue.
*/
- if (qib_sdma_descq_freecnt(ppd) < ret * 4) {
+ if (qib_sdma_descq_freecnt(ppd) < ndesc) {
qib_user_sdma_hwqueue_clean(ppd);
- qib_user_sdma_queue_clean(ppd, pq);
+ if (pq->num_sending)
+ qib_user_sdma_queue_clean(ppd, pq);
}
- ret = qib_user_sdma_push_pkts(ppd, pq, &list);
+ ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp);
if (ret < 0)
goto done_unlock;
else {
- npkts += ret;
- pq->counter += ret;
-
- if (!list_empty(&list))
- goto done_unlock;
+ npkts += mxp;
+ pq->counter += mxp;
}
}
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3eceb61e3532..7a3175400b2a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -817,7 +817,6 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (neigh) {
neigh->cm = NULL;
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
tx->neigh = NULL;
@@ -1234,7 +1233,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (neigh) {
neigh->cm = NULL;
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
tx->neigh = NULL;
@@ -1325,7 +1323,6 @@ static void ipoib_cm_tx_start(struct work_struct *work)
neigh = p->neigh;
if (neigh) {
neigh->cm = NULL;
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
}
list_del(&p->list);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c6f71a88c55c..82cec1af902c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -493,7 +493,6 @@ static void path_rec_completion(int status,
path,
neigh));
if (!ipoib_cm_get(neigh)) {
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
continue;
}
@@ -618,7 +617,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
if (!ipoib_cm_get(neigh))
ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
if (!ipoib_cm_get(neigh)) {
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
goto err_drop;
}
@@ -639,7 +637,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
neigh->ah = NULL;
if (!path->query && path_rec_start(dev, path))
- goto err_list;
+ goto err_path;
__skb_queue_tail(&neigh->queue, skb);
}
@@ -648,9 +646,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
ipoib_neigh_put(neigh);
return;
-err_list:
- list_del(&neigh->list);
-
err_path:
ipoib_neigh_free(neigh);
err_drop:
@@ -1098,6 +1093,8 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh)
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
+ /* remove from parent list */
+ list_del(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
return;
} else {
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 2e84ef859c5b..705de7b40201 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -347,6 +347,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
{
struct iscsi_conn *conn = cls_conn->dd_data;
struct iscsi_iser_conn *iser_conn;
+ struct iscsi_session *session;
struct iser_conn *ib_conn;
struct iscsi_endpoint *ep;
int error;
@@ -365,7 +366,8 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
}
ib_conn = ep->dd_data;
- if (iser_alloc_rx_descriptors(ib_conn))
+ session = conn->session;
+ if (iser_alloc_rx_descriptors(ib_conn, session))
return -ENOMEM;
/* binds the iSER connection retrieved from the previously
@@ -419,12 +421,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
struct iscsi_cls_session *cls_session;
struct iscsi_session *session;
struct Scsi_Host *shost;
- struct iser_conn *ib_conn;
+ struct iser_conn *ib_conn = NULL;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
return NULL;
shost->transportt = iscsi_iser_scsi_transport;
+ shost->cmd_per_lun = qdepth;
shost->max_lun = iscsi_max_lun;
shost->max_id = 0;
shost->max_channel = 0;
@@ -441,12 +444,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
ep ? ib_conn->device->ib_device->dma_device : NULL))
goto free_host;
- /*
- * we do not support setting can_queue cmd_per_lun from userspace yet
- * because we preallocate so many resources
- */
+ if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+ iser_info("cmds_max changed from %u to %u\n",
+ cmds_max, ISER_DEF_XMIT_CMDS_MAX);
+ cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+ }
+
cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
- ISCSI_DEF_XMIT_CMDS_MAX, 0,
+ cmds_max, 0,
sizeof(struct iscsi_iser_task),
initial_cmdsn, 0);
if (!cls_session)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 4f069c0d4c04..67914027c614 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -78,14 +78,14 @@
#define iser_warn(fmt, arg...) \
do { \
- if (iser_debug_level > 1) \
+ if (iser_debug_level > 0) \
pr_warn(PFX "%s:" fmt, \
__func__ , ## arg); \
} while (0)
#define iser_info(fmt, arg...) \
do { \
- if (iser_debug_level > 0) \
+ if (iser_debug_level > 1) \
pr_info(PFX "%s:" fmt, \
__func__ , ## arg); \
} while (0)
@@ -102,7 +102,13 @@
/* support up to 512KB in one RDMA */
#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K)
-#define ISER_DEF_CMD_PER_LUN ISCSI_DEF_XMIT_CMDS_MAX
+#define ISER_DEF_XMIT_CMDS_DEFAULT 512
+#if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
+ #define ISER_DEF_XMIT_CMDS_MAX ISCSI_DEF_XMIT_CMDS_MAX
+#else
+ #define ISER_DEF_XMIT_CMDS_MAX ISER_DEF_XMIT_CMDS_DEFAULT
+#endif
+#define ISER_DEF_CMD_PER_LUN ISER_DEF_XMIT_CMDS_MAX
/* QP settings */
/* Maximal bounds on received asynchronous PDUs */
@@ -111,9 +117,9 @@
#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
* SCSI_TMFUNC(2), LOGOUT(1) */
-#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+#define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX)
-#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2)
/* the max TX (send) WR supported by the iSER QP is defined by *
* max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
@@ -123,7 +129,7 @@
#define ISER_INFLIGHT_DATAOUTS 8
-#define ISER_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
+#define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \
(1 + ISER_INFLIGHT_DATAOUTS) + \
ISER_MAX_TX_MISC_PDUS + \
ISER_MAX_RX_MISC_PDUS)
@@ -205,7 +211,7 @@ struct iser_mem_reg {
u64 va;
u64 len;
void *mem_h;
- int is_fmr;
+ int is_mr;
};
struct iser_regd_buf {
@@ -246,6 +252,9 @@ struct iser_rx_desc {
#define ISER_MAX_CQ 4
+struct iser_conn;
+struct iscsi_iser_task;
+
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
@@ -259,6 +268,22 @@ struct iser_device {
int cq_active_qps[ISER_MAX_CQ];
int cqs_used;
struct iser_cq_desc *cq_desc;
+ int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn,
+ unsigned cmds_max);
+ void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn);
+ int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+ void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+};
+
+struct fast_reg_descriptor {
+ struct list_head list;
+ /* For fast registration - FRWR */
+ struct ib_mr *data_mr;
+ struct ib_fast_reg_page_list *data_frpl;
+ /* Valid for fast registration flag */
+ bool valid;
};
struct iser_conn {
@@ -270,13 +295,13 @@ struct iser_conn {
struct iser_device *device; /* device context */
struct rdma_cm_id *cma_id; /* CMA ID */
struct ib_qp *qp; /* QP */
- struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
wait_queue_head_t wait; /* waitq for conn/disconn */
+ unsigned qp_max_recv_dtos; /* num of rx buffers */
+ unsigned qp_max_recv_dtos_mask; /* above minus 1 */
+ unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */
int post_recv_buf_count; /* posted rx count */
atomic_t post_send_buf_count; /* posted tx count */
char name[ISER_OBJECT_NAME_SIZE];
- struct iser_page_vec *page_vec; /* represents SG to fmr maps*
- * maps serialized as tx is*/
struct list_head conn_list; /* entry in ig conn list */
char *login_buf;
@@ -285,6 +310,17 @@ struct iser_conn {
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
+ union {
+ struct {
+ struct ib_fmr_pool *pool; /* pool of IB FMRs */
+ struct iser_page_vec *page_vec; /* represents SG to fmr maps*
+ * maps serialized as tx is*/
+ } fmr;
+ struct {
+ struct list_head pool;
+ int pool_size;
+ } frwr;
+ } fastreg;
};
struct iscsi_iser_conn {
@@ -368,8 +404,10 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
-int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
+ enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task,
+ enum iser_data_dir cmd_dir);
int iser_connect(struct iser_conn *ib_conn,
struct sockaddr_in *src_addr,
@@ -380,7 +418,10 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg);
-void iser_unreg_mem(struct iser_mem_reg *mem_reg);
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
int iser_post_recvl(struct iser_conn *ib_conn);
int iser_post_recvm(struct iser_conn *ib_conn, int count);
@@ -394,5 +435,9 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn);
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_fmr_pool(struct iser_conn *ib_conn);
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_frwr_pool(struct iser_conn *ib_conn);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index b6d81a86c976..5f01da99ad66 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -49,6 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -69,7 +70,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
return -EINVAL;
}
- err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN);
+ err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
@@ -98,6 +99,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
unsigned int edtl)
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -119,7 +121,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return -EINVAL;
}
- err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT);
+ err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
if (err != 0) {
iser_err("Failed to register write cmd RDMA mem\n");
return err;
@@ -170,8 +172,78 @@ static void iser_create_send_desc(struct iser_conn *ib_conn,
}
}
+static void iser_free_login_buf(struct iser_conn *ib_conn)
+{
+ if (!ib_conn->login_buf)
+ return;
+
+ if (ib_conn->login_req_dma)
+ ib_dma_unmap_single(ib_conn->device->ib_device,
+ ib_conn->login_req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+ if (ib_conn->login_resp_dma)
+ ib_dma_unmap_single(ib_conn->device->ib_device,
+ ib_conn->login_resp_dma,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+ kfree(ib_conn->login_buf);
+
+ /* make sure we never redo any unmapping */
+ ib_conn->login_req_dma = 0;
+ ib_conn->login_resp_dma = 0;
+ ib_conn->login_buf = NULL;
+}
+
+static int iser_alloc_login_buf(struct iser_conn *ib_conn)
+{
+ struct iser_device *device;
+ int req_err, resp_err;
+
+ BUG_ON(ib_conn->device == NULL);
+
+ device = ib_conn->device;
+
+ ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
+ ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+ if (!ib_conn->login_buf)
+ goto out_err;
+
+ ib_conn->login_req_buf = ib_conn->login_buf;
+ ib_conn->login_resp_buf = ib_conn->login_buf +
+ ISCSI_DEF_MAX_RECV_SEG_LEN;
+
+ ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
+ (void *)ib_conn->login_req_buf,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+ ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
+ (void *)ib_conn->login_resp_buf,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+ req_err = ib_dma_mapping_error(device->ib_device,
+ ib_conn->login_req_dma);
+ resp_err = ib_dma_mapping_error(device->ib_device,
+ ib_conn->login_resp_dma);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
+ if (req_err || resp_err) {
+ if (req_err)
+ ib_conn->login_req_dma = 0;
+ if (resp_err)
+ ib_conn->login_resp_dma = 0;
+ goto free_login_buf;
+ }
+ return 0;
+
+free_login_buf:
+ iser_free_login_buf(ib_conn);
+
+out_err:
+ iser_err("unable to alloc or map login buf\n");
+ return -ENOMEM;
+}
+
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session)
{
int i, j;
u64 dma_addr;
@@ -179,14 +251,24 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
struct ib_sge *rx_sg;
struct iser_device *device = ib_conn->device;
- ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
+ ib_conn->qp_max_recv_dtos = session->cmds_max;
+ ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
+ ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2;
+
+ if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
+ goto create_rdma_reg_res_failed;
+
+ if (iser_alloc_login_buf(ib_conn))
+ goto alloc_login_buf_fail;
+
+ ib_conn->rx_descs = kmalloc(session->cmds_max *
sizeof(struct iser_rx_desc), GFP_KERNEL);
if (!ib_conn->rx_descs)
goto rx_desc_alloc_fail;
rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) {
+ for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) {
dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(device->ib_device, dma_addr))
@@ -207,10 +289,14 @@ rx_desc_dma_map_failed:
rx_desc = ib_conn->rx_descs;
for (j = 0; j < i; j++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
- ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
kfree(ib_conn->rx_descs);
ib_conn->rx_descs = NULL;
rx_desc_alloc_fail:
+ iser_free_login_buf(ib_conn);
+alloc_login_buf_fail:
+ device->iser_free_rdma_reg_res(ib_conn);
+create_rdma_reg_res_failed:
iser_err("failed allocating rx descriptors / data buffers\n");
return -ENOMEM;
}
@@ -222,13 +308,21 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
struct iser_device *device = ib_conn->device;
if (!ib_conn->rx_descs)
- return;
+ goto free_login_buf;
+
+ if (device->iser_free_rdma_reg_res)
+ device->iser_free_rdma_reg_res(ib_conn);
rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
+ for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
- ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
kfree(ib_conn->rx_descs);
+ /* make sure we never redo any unmapping */
+ ib_conn->rx_descs = NULL;
+
+free_login_buf:
+ iser_free_login_buf(ib_conn);
}
static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
@@ -248,9 +342,10 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1);
WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
- iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
+ iser_dbg("Initially post: %d\n", iser_conn->ib_conn->min_posted_rx);
/* Initial post receive buffers */
- if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
+ if (iser_post_recvm(iser_conn->ib_conn,
+ iser_conn->ib_conn->min_posted_rx))
return -ENOMEM;
return 0;
@@ -487,9 +582,9 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
return;
outstanding = ib_conn->post_recv_buf_count;
- if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
- count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
- ISER_MIN_POSTED_RX);
+ if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) {
+ count = min(ib_conn->qp_max_recv_dtos - outstanding,
+ ib_conn->min_posted_rx);
err = iser_post_recvm(ib_conn, count);
if (err)
iser_err("posting %d rx bufs err %d\n", count, err);
@@ -538,8 +633,8 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
int is_rdma_aligned = 1;
- struct iser_regd_buf *regd;
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
@@ -553,17 +648,11 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT);
}
- if (iser_task->dir[ISER_DIR_IN]) {
- regd = &iser_task->rdma_regd[ISER_DIR_IN];
- if (regd->reg.is_fmr)
- iser_unreg_mem(&regd->reg);
- }
+ if (iser_task->dir[ISER_DIR_IN])
+ device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
- if (iser_task->dir[ISER_DIR_OUT]) {
- regd = &iser_task->rdma_regd[ISER_DIR_OUT];
- if (regd->reg.is_fmr)
- iser_unreg_mem(&regd->reg);
- }
+ if (iser_task->dir[ISER_DIR_OUT])
+ device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
/* if the data was unaligned, it was already unmapped and then copied */
if (is_rdma_aligned)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 7827baf455a1..1ce0c97d2ccb 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -170,8 +170,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
*/
static int iser_sg_to_page_vec(struct iser_data_buf *data,
- struct iser_page_vec *page_vec,
- struct ib_device *ibdev)
+ struct ib_device *ibdev, u64 *pages,
+ int *offset, int *data_size)
{
struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
u64 start_addr, end_addr, page, chunk_start = 0;
@@ -180,7 +180,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
/* compute the offset of first element */
- page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
+ *offset = (u64) sgl[0].offset & ~MASK_4K;
new_chunk = 1;
cur_page = 0;
@@ -204,13 +204,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
which might be unaligned */
page = chunk_start & MASK_4K;
do {
- page_vec->pages[cur_page++] = page;
+ pages[cur_page++] = page;
page += SIZE_4K;
} while (page < end_addr);
}
- page_vec->data_size = total_sz;
- iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
+ *data_size = total_sz;
+ iser_dbg("page_vec->data_size:%d cur_page %d\n",
+ *data_size, cur_page);
return cur_page;
}
@@ -267,11 +268,8 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
struct scatterlist *sg;
int i;
- if (iser_debug_level == 0)
- return;
-
for_each_sg(sgl, sg, data->dma_nents, i)
- iser_warn("sg[%d] dma_addr:0x%lX page:0x%p "
+ iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
"off:0x%x sz:0x%x dma_len:0x%x\n",
i, (unsigned long)ib_sg_dma_address(ibdev, sg),
sg_page(sg), sg->offset,
@@ -298,8 +296,10 @@ static void iser_page_vec_build(struct iser_data_buf *data,
page_vec->offset = 0;
iser_dbg("Translating sg sz: %d\n", data->dma_nents);
- page_vec_len = iser_sg_to_page_vec(data, page_vec, ibdev);
- iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len);
+ page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
+ &page_vec->offset,
+ &page_vec->data_size);
+ iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
page_vec->length = page_vec_len;
@@ -347,16 +347,41 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task)
}
}
+static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
+ struct ib_device *ibdev,
+ enum iser_data_dir cmd_dir,
+ int aligned_len)
+{
+ struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+ struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+
+ iscsi_conn->fmr_unalign_cnt++;
+ iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
+ aligned_len, mem->size);
+
+ if (iser_debug_level > 0)
+ iser_data_buf_dump(mem, ibdev);
+
+ /* unmap the command data before accessing it */
+ iser_dma_unmap_task_data(iser_task);
+
+ /* allocate copy buf, if we are writing, copy the */
+ /* unaligned scatterlist, dma map the copy */
+ if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
/**
- * iser_reg_rdma_mem - Registers memory intended for RDMA,
- * obtaining rkey and va
+ * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
+ * using FMR (if possible) obtaining rkey and va
*
* returns 0 on success, errno code on failure
*/
-int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
+int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
- struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
@@ -370,20 +395,13 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
regd_buf = &iser_task->rdma_regd[cmd_dir];
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
- if (aligned_len != mem->dma_nents ||
- (!ib_conn->fmr_pool && mem->dma_nents > 1)) {
- iscsi_conn->fmr_unalign_cnt++;
- iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
- aligned_len, mem->size);
- iser_data_buf_dump(mem, ibdev);
-
- /* unmap the command data before accessing it */
- iser_dma_unmap_task_data(iser_task);
-
- /* allocate copy buf, if we are writing, copy the */
- /* unaligned scatterlist, dma map the copy */
- if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
- return -ENOMEM;
+ if (aligned_len != mem->dma_nents) {
+ err = fall_to_bounce_buf(iser_task, ibdev,
+ cmd_dir, aligned_len);
+ if (err) {
+ iser_err("failed to allocate bounce buffer\n");
+ return err;
+ }
mem = &iser_task->data_copy[cmd_dir];
}
@@ -395,7 +413,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
- regd_buf->reg.is_fmr = 0;
+ regd_buf->reg.is_mr = 0;
iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
"va: 0x%08lX sz: %ld]\n",
@@ -404,22 +422,159 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
(unsigned long)regd_buf->reg.va,
(unsigned long)regd_buf->reg.len);
} else { /* use FMR for multiple dma entries */
- iser_page_vec_build(mem, ib_conn->page_vec, ibdev);
- err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
+ iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev);
+ err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec,
+ &regd_buf->reg);
if (err && err != -EAGAIN) {
iser_data_buf_dump(mem, ibdev);
iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
mem->dma_nents,
ntoh24(iser_task->desc.iscsi_header.dlength));
iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
- ib_conn->page_vec->data_size, ib_conn->page_vec->length,
- ib_conn->page_vec->offset);
- for (i=0 ; i<ib_conn->page_vec->length ; i++)
+ ib_conn->fastreg.fmr.page_vec->data_size,
+ ib_conn->fastreg.fmr.page_vec->length,
+ ib_conn->fastreg.fmr.page_vec->offset);
+ for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++)
iser_err("page_vec[%d] = 0x%llx\n", i,
- (unsigned long long) ib_conn->page_vec->pages[i]);
+ (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]);
}
if (err)
return err;
}
return 0;
}
+
+static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
+ struct iser_conn *ib_conn,
+ struct iser_regd_buf *regd_buf,
+ u32 offset, unsigned int data_size,
+ unsigned int page_list_len)
+{
+ struct ib_send_wr fastreg_wr, inv_wr;
+ struct ib_send_wr *bad_wr, *wr = NULL;
+ u8 key;
+ int ret;
+
+ if (!desc->valid) {
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.send_flags = IB_SEND_SIGNALED;
+ inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
+ wr = &inv_wr;
+ /* Bump the key */
+ key = (u8)(desc->data_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(desc->data_mr, ++key);
+ }
+
+ /* Prepare FASTREG WR */
+ memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.send_flags = IB_SEND_SIGNALED;
+ fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset;
+ fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
+ fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
+ fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
+ fastreg_wr.wr.fast_reg.length = data_size;
+ fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
+ fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+
+ if (!wr) {
+ wr = &fastreg_wr;
+ atomic_inc(&ib_conn->post_send_buf_count);
+ } else {
+ wr->next = &fastreg_wr;
+ atomic_add(2, &ib_conn->post_send_buf_count);
+ }
+
+ ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
+ if (ret) {
+ if (bad_wr->next)
+ atomic_sub(2, &ib_conn->post_send_buf_count);
+ else
+ atomic_dec(&ib_conn->post_send_buf_count);
+ iser_err("fast registration failed, ret:%d\n", ret);
+ return ret;
+ }
+ desc->valid = false;
+
+ regd_buf->reg.mem_h = desc;
+ regd_buf->reg.lkey = desc->data_mr->lkey;
+ regd_buf->reg.rkey = desc->data_mr->rkey;
+ regd_buf->reg.va = desc->data_frpl->page_list[0] + offset;
+ regd_buf->reg.len = data_size;
+ regd_buf->reg.is_mr = 1;
+
+ return ret;
+}
+
+/**
+ * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA,
+ * using Fast Registration WR (if possible) obtaining rkey and va
+ *
+ * returns 0 on success, errno code on failure
+ */
+int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
+ struct ib_device *ibdev = device->ib_device;
+ struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+ struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
+ struct fast_reg_descriptor *desc;
+ unsigned int data_size, page_list_len;
+ int err, aligned_len;
+ unsigned long flags;
+ u32 offset;
+
+ aligned_len = iser_data_buf_aligned_len(mem, ibdev);
+ if (aligned_len != mem->dma_nents) {
+ err = fall_to_bounce_buf(iser_task, ibdev,
+ cmd_dir, aligned_len);
+ if (err) {
+ iser_err("failed to allocate bounce buffer\n");
+ return err;
+ }
+ mem = &iser_task->data_copy[cmd_dir];
+ }
+
+ /* if there a single dma entry, dma mr suffices */
+ if (mem->dma_nents == 1) {
+ struct scatterlist *sg = (struct scatterlist *)mem->buf;
+
+ regd_buf->reg.lkey = device->mr->lkey;
+ regd_buf->reg.rkey = device->mr->rkey;
+ regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
+ regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
+ regd_buf->reg.is_mr = 0;
+ } else {
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ desc = list_first_entry(&ib_conn->fastreg.frwr.pool,
+ struct fast_reg_descriptor, list);
+ list_del(&desc->list);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+ page_list_len = iser_sg_to_page_vec(mem, device->ib_device,
+ desc->data_frpl->page_list,
+ &offset, &data_size);
+
+ if (page_list_len * SIZE_4K < data_size) {
+ iser_err("fast reg page_list too short to hold this SG\n");
+ err = -EINVAL;
+ goto err_reg;
+ }
+
+ err = iser_fast_reg_mr(desc, ib_conn, regd_buf,
+ offset, data_size, page_list_len);
+ if (err)
+ goto err_reg;
+ }
+
+ return 0;
+err_reg:
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+ return err;
+}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 2c4941d0656b..afe95674008b 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -73,6 +73,36 @@ static int iser_create_device_ib_res(struct iser_device *device)
{
int i, j;
struct iser_cq_desc *cq_desc;
+ struct ib_device_attr *dev_attr;
+
+ dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL);
+ if (!dev_attr)
+ return -ENOMEM;
+
+ if (ib_query_device(device->ib_device, dev_attr)) {
+ pr_warn("Query device failed for %s\n", device->ib_device->name);
+ goto dev_attr_err;
+ }
+
+ /* Assign function handles - based on FMR support */
+ if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
+ device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+ iser_info("FMR supported, using FMR for registration\n");
+ device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
+ device->iser_free_rdma_reg_res = iser_free_fmr_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
+ } else
+ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ iser_info("FRWR supported, using FRWR for registration\n");
+ device->iser_alloc_rdma_reg_res = iser_create_frwr_pool;
+ device->iser_free_rdma_reg_res = iser_free_frwr_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_frwr;
+ } else {
+ iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n");
+ goto dev_attr_err;
+ }
device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
iser_info("using %d CQs, device %s supports %d vectors\n",
@@ -128,6 +158,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
if (ib_register_event_handler(&device->event_handler))
goto handler_err;
+ kfree(dev_attr);
return 0;
handler_err:
@@ -147,6 +178,8 @@ pd_err:
kfree(device->cq_desc);
cq_desc_err:
iser_err("failed to allocate an IB resource\n");
+dev_attr_err:
+ kfree(dev_attr);
return -1;
}
@@ -178,56 +211,23 @@ static void iser_free_device_ib_res(struct iser_device *device)
}
/**
- * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
+ * iser_create_fmr_pool - Creates FMR pool and page_vector
*
- * returns 0 on success, -1 on failure
+ * returns 0 on success, or errno code on failure
*/
-static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
{
- struct iser_device *device;
- struct ib_qp_init_attr init_attr;
- int req_err, resp_err, ret = -ENOMEM;
+ struct iser_device *device = ib_conn->device;
struct ib_fmr_pool_param params;
- int index, min_index = 0;
-
- BUG_ON(ib_conn->device == NULL);
-
- device = ib_conn->device;
-
- ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
- ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!ib_conn->login_buf)
- goto out_err;
-
- ib_conn->login_req_buf = ib_conn->login_buf;
- ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN;
-
- ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
-
- ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
-
- req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
- resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);
-
- if (req_err || resp_err) {
- if (req_err)
- ib_conn->login_req_dma = 0;
- if (resp_err)
- ib_conn->login_resp_dma = 0;
- goto out_err;
- }
+ int ret = -ENOMEM;
- ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
- (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
- GFP_KERNEL);
- if (!ib_conn->page_vec)
- goto out_err;
+ ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) +
+ (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
+ GFP_KERNEL);
+ if (!ib_conn->fastreg.fmr.page_vec)
+ return ret;
- ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
+ ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1);
params.page_shift = SHIFT_4K;
/* when the first/last SG element are not start/end *
@@ -235,24 +235,143 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
/* make the pool size twice the max number of SCSI commands *
* the ML is expected to queue, watermark for unmap at 50% */
- params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2;
- params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX;
+ params.pool_size = cmds_max * 2;
+ params.dirty_watermark = cmds_max;
params.cache = 0;
params.flush_function = NULL;
params.access = (IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);
- ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
- ret = PTR_ERR(ib_conn->fmr_pool);
- if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) {
- ib_conn->fmr_pool = NULL;
- goto out_err;
- } else if (ret == -ENOSYS) {
- ib_conn->fmr_pool = NULL;
+ ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, &params);
+ if (!IS_ERR(ib_conn->fastreg.fmr.pool))
+ return 0;
+
+ /* no FMR => no need for page_vec */
+ kfree(ib_conn->fastreg.fmr.page_vec);
+ ib_conn->fastreg.fmr.page_vec = NULL;
+
+ ret = PTR_ERR(ib_conn->fastreg.fmr.pool);
+ ib_conn->fastreg.fmr.pool = NULL;
+ if (ret != -ENOSYS) {
+ iser_err("FMR allocation failed, err %d\n", ret);
+ return ret;
+ } else {
iser_warn("FMRs are not supported, using unaligned mode\n");
- ret = 0;
+ return 0;
}
+}
+
+/**
+ * iser_free_fmr_pool - releases the FMR pool and page vec
+ */
+void iser_free_fmr_pool(struct iser_conn *ib_conn)
+{
+ iser_info("freeing conn %p fmr pool %p\n",
+ ib_conn, ib_conn->fastreg.fmr.pool);
+
+ if (ib_conn->fastreg.fmr.pool != NULL)
+ ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool);
+
+ ib_conn->fastreg.fmr.pool = NULL;
+
+ kfree(ib_conn->fastreg.fmr.page_vec);
+ ib_conn->fastreg.fmr.page_vec = NULL;
+}
+
+/**
+ * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * for fast registration work requests.
+ * returns 0 on success, or errno code on failure
+ */
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+{
+ struct iser_device *device = ib_conn->device;
+ struct fast_reg_descriptor *desc;
+ int i, ret;
+
+ INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
+ ib_conn->fastreg.frwr.pool_size = 0;
+ for (i = 0; i < cmds_max; i++) {
+ desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ iser_err("Failed to allocate a new fast_reg descriptor\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_frpl)) {
+ ret = PTR_ERR(desc->data_frpl);
+ iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret);
+ goto fast_reg_page_failure;
+ }
+
+ desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_mr)) {
+ ret = PTR_ERR(desc->data_mr);
+ iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
+ goto fast_reg_mr_failure;
+ }
+ desc->valid = true;
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ ib_conn->fastreg.frwr.pool_size++;
+ }
+
+ return 0;
+
+fast_reg_mr_failure:
+ ib_free_fast_reg_page_list(desc->data_frpl);
+fast_reg_page_failure:
+ kfree(desc);
+err:
+ iser_free_frwr_pool(ib_conn);
+ return ret;
+}
+
+/**
+ * iser_free_frwr_pool - releases the pool of fast_reg descriptors
+ */
+void iser_free_frwr_pool(struct iser_conn *ib_conn)
+{
+ struct fast_reg_descriptor *desc, *tmp;
+ int i = 0;
+
+ if (list_empty(&ib_conn->fastreg.frwr.pool))
+ return;
+
+ iser_info("freeing conn %p frwr pool\n", ib_conn);
+
+ list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) {
+ list_del(&desc->list);
+ ib_free_fast_reg_page_list(desc->data_frpl);
+ ib_dereg_mr(desc->data_mr);
+ kfree(desc);
+ ++i;
+ }
+
+ if (i < ib_conn->fastreg.frwr.pool_size)
+ iser_warn("pool still has %d regions registered\n",
+ ib_conn->fastreg.frwr.pool_size - i);
+}
+
+/**
+ * iser_create_ib_conn_res - Queue-Pair (QP)
+ *
+ * returns 0 on success, -1 on failure
+ */
+static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+{
+ struct iser_device *device;
+ struct ib_qp_init_attr init_attr;
+ int ret = -ENOMEM;
+ int index, min_index = 0;
+
+ BUG_ON(ib_conn->device == NULL);
+
+ device = ib_conn->device;
memset(&init_attr, 0, sizeof init_attr);
@@ -282,9 +401,9 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
goto out_err;
ib_conn->qp = ib_conn->cma_id->qp;
- iser_info("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
+ iser_info("setting conn %p cma_id %p qp %p\n",
ib_conn, ib_conn->cma_id,
- ib_conn->fmr_pool, ib_conn->cma_id->qp);
+ ib_conn->cma_id->qp);
return ret;
out_err:
@@ -293,7 +412,7 @@ out_err:
}
/**
- * releases the FMR pool and QP objects, returns 0 on success,
+ * releases the QP objects, returns 0 on success,
* -1 on failure
*/
static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
@@ -301,13 +420,11 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
int cq_index;
BUG_ON(ib_conn == NULL);
- iser_info("freeing conn %p cma_id %p fmr pool %p qp %p\n",
+ iser_info("freeing conn %p cma_id %p qp %p\n",
ib_conn, ib_conn->cma_id,
- ib_conn->fmr_pool, ib_conn->qp);
+ ib_conn->qp);
/* qp is created only once both addr & route are resolved */
- if (ib_conn->fmr_pool != NULL)
- ib_destroy_fmr_pool(ib_conn->fmr_pool);
if (ib_conn->qp != NULL) {
cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
@@ -316,21 +433,7 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
rdma_destroy_qp(ib_conn->cma_id);
}
- ib_conn->fmr_pool = NULL;
ib_conn->qp = NULL;
- kfree(ib_conn->page_vec);
-
- if (ib_conn->login_buf) {
- if (ib_conn->login_req_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (ib_conn->login_resp_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_resp_dma,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->login_buf);
- }
return 0;
}
@@ -694,7 +797,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
page_list = page_vec->pages;
io_addr = page_list[0];
- mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool,
+ mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool,
page_list,
page_vec->length,
io_addr);
@@ -709,7 +812,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
mem_reg->rkey = mem->fmr->rkey;
mem_reg->len = page_vec->length * SIZE_4K;
mem_reg->va = io_addr;
- mem_reg->is_fmr = 1;
+ mem_reg->is_mr = 1;
mem_reg->mem_h = (void *)mem;
mem_reg->va += page_vec->offset;
@@ -727,12 +830,18 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
}
/**
- * Unregister (previosuly registered) memory.
+ * Unregister (previosuly registered using FMR) memory.
+ * If memory is non-FMR does nothing.
*/
-void iser_unreg_mem(struct iser_mem_reg *reg)
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
+ struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
+ if (!reg->is_mr)
+ return;
+
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
@@ -742,6 +851,23 @@ void iser_unreg_mem(struct iser_mem_reg *reg)
reg->mem_h = NULL;
}
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+ struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct fast_reg_descriptor *desc = reg->mem_h;
+
+ if (!reg->is_mr)
+ return;
+
+ reg->mem_h = NULL;
+ reg->is_mr = 0;
+ spin_lock_bh(&ib_conn->lock);
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ spin_unlock_bh(&ib_conn->lock);
+}
+
int iser_post_recvl(struct iser_conn *ib_conn)
{
struct ib_recv_wr rx_wr, *rx_wr_failed;
@@ -779,7 +905,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
- my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
+ my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask;
}
rx_wr--;
@@ -863,7 +989,11 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
if (wc.status == IB_WC_SUCCESS) {
if (wc.opcode == IB_WC_SEND)
iser_snd_completion(tx_desc, ib_conn);
- else
+ else if (wc.opcode == IB_WC_LOCAL_INV ||
+ wc.opcode == IB_WC_FAST_REG_MR) {
+ atomic_dec(&ib_conn->post_send_buf_count);
+ continue;
+ } else
iser_err("expected opcode %d got %d\n",
IB_WC_SEND, wc.opcode);
} else {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 2aafb809e067..dfd1e36f5753 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -576,6 +576,7 @@ struct adapter {
struct l2t_data *l2t;
void *uld_handle[CXGB4_ULD_MAX];
struct list_head list_node;
+ struct list_head rcu_node;
struct tid_info tids;
void **tid_release_head;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 5a3256b083f2..038df4b96139 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -60,6 +60,7 @@
#include <linux/workqueue.h>
#include <net/neighbour.h>
#include <net/netevent.h>
+#include <net/addrconf.h>
#include <asm/uaccess.h>
#include "cxgb4.h"
@@ -68,6 +69,11 @@
#include "t4fw_api.h"
#include "l2t.h"
+#include <../drivers/net/bonding/bonding.h>
+
+#ifdef DRV_VERSION
+#undef DRV_VERSION
+#endif
#define DRV_VERSION "2.0.0-ko"
#define DRV_DESC "Chelsio T4/T5 Network Driver"
@@ -400,6 +406,9 @@ static struct dentry *cxgb4_debugfs_root;
static LIST_HEAD(adapter_list);
static DEFINE_MUTEX(uld_mutex);
+/* Adapter list to be accessed from atomic context */
+static LIST_HEAD(adap_rcu_list);
+static DEFINE_SPINLOCK(adap_rcu_lock);
static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
static const char *uld_str[] = { "RDMA", "iSCSI" };
@@ -3227,6 +3236,38 @@ static int tid_init(struct tid_info *t)
return 0;
}
+static int cxgb4_clip_get(const struct net_device *dev,
+ const struct in6_addr *lip)
+{
+ struct adapter *adap;
+ struct fw_clip_cmd c;
+
+ adap = netdev2adap(dev);
+ memset(&c, 0, sizeof(c));
+ c.op_to_write = htonl(FW_CMD_OP(FW_CLIP_CMD) |
+ FW_CMD_REQUEST | FW_CMD_WRITE);
+ c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c));
+ *(__be64 *)&c.ip_hi = *(__be64 *)(lip->s6_addr);
+ *(__be64 *)&c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
+ return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
+}
+
+static int cxgb4_clip_release(const struct net_device *dev,
+ const struct in6_addr *lip)
+{
+ struct adapter *adap;
+ struct fw_clip_cmd c;
+
+ adap = netdev2adap(dev);
+ memset(&c, 0, sizeof(c));
+ c.op_to_write = htonl(FW_CMD_OP(FW_CLIP_CMD) |
+ FW_CMD_REQUEST | FW_CMD_READ);
+ c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c));
+ *(__be64 *)&c.ip_hi = *(__be64 *)(lip->s6_addr);
+ *(__be64 *)&c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
+ return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
+}
+
/**
* cxgb4_create_server - create an IP server
* @dev: the device
@@ -3246,6 +3287,7 @@ int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
struct sk_buff *skb;
struct adapter *adap;
struct cpl_pass_open_req *req;
+ int ret;
skb = alloc_skb(sizeof(*req), GFP_KERNEL);
if (!skb)
@@ -3263,10 +3305,78 @@ int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
req->opt0 = cpu_to_be64(TX_CHAN(chan));
req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
- return t4_mgmt_tx(adap, skb);
+ ret = t4_mgmt_tx(adap, skb);
+ return net_xmit_eval(ret);
}
EXPORT_SYMBOL(cxgb4_create_server);
+/* cxgb4_create_server6 - create an IPv6 server
+ * @dev: the device
+ * @stid: the server TID
+ * @sip: local IPv6 address to bind server to
+ * @sport: the server's TCP port
+ * @queue: queue to direct messages from this server to
+ *
+ * Create an IPv6 server for the given port and address.
+ * Returns <0 on error and one of the %NET_XMIT_* values on success.
+ */
+int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
+ const struct in6_addr *sip, __be16 sport,
+ unsigned int queue)
+{
+ unsigned int chan;
+ struct sk_buff *skb;
+ struct adapter *adap;
+ struct cpl_pass_open_req6 *req;
+ int ret;
+
+ skb = alloc_skb(sizeof(*req), GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ adap = netdev2adap(dev);
+ req = (struct cpl_pass_open_req6 *)__skb_put(skb, sizeof(*req));
+ INIT_TP_WR(req, 0);
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, stid));
+ req->local_port = sport;
+ req->peer_port = htons(0);
+ req->local_ip_hi = *(__be64 *)(sip->s6_addr);
+ req->local_ip_lo = *(__be64 *)(sip->s6_addr + 8);
+ req->peer_ip_hi = cpu_to_be64(0);
+ req->peer_ip_lo = cpu_to_be64(0);
+ chan = rxq_to_chan(&adap->sge, queue);
+ req->opt0 = cpu_to_be64(TX_CHAN(chan));
+ req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
+ SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
+ ret = t4_mgmt_tx(adap, skb);
+ return net_xmit_eval(ret);
+}
+EXPORT_SYMBOL(cxgb4_create_server6);
+
+int cxgb4_remove_server(const struct net_device *dev, unsigned int stid,
+ unsigned int queue, bool ipv6)
+{
+ struct sk_buff *skb;
+ struct adapter *adap;
+ struct cpl_close_listsvr_req *req;
+ int ret;
+
+ adap = netdev2adap(dev);
+
+ skb = alloc_skb(sizeof(*req), GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ req = (struct cpl_close_listsvr_req *)__skb_put(skb, sizeof(*req));
+ INIT_TP_WR(req, 0);
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
+ req->reply_ctrl = htons(NO_REPLY(0) | (ipv6 ? LISTSVR_IPV6(1) :
+ LISTSVR_IPV6(0)) | QUEUENO(queue));
+ ret = t4_mgmt_tx(adap, skb);
+ return net_xmit_eval(ret);
+}
+EXPORT_SYMBOL(cxgb4_remove_server);
+
/**
* cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
* @mtus: the HW MTU table
@@ -3721,6 +3831,10 @@ static void attach_ulds(struct adapter *adap)
{
unsigned int i;
+ spin_lock(&adap_rcu_lock);
+ list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list);
+ spin_unlock(&adap_rcu_lock);
+
mutex_lock(&uld_mutex);
list_add_tail(&adap->list_node, &adapter_list);
for (i = 0; i < CXGB4_ULD_MAX; i++)
@@ -3746,6 +3860,10 @@ static void detach_ulds(struct adapter *adap)
netevent_registered = false;
}
mutex_unlock(&uld_mutex);
+
+ spin_lock(&adap_rcu_lock);
+ list_del_rcu(&adap->rcu_node);
+ spin_unlock(&adap_rcu_lock);
}
static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
@@ -3809,6 +3927,169 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
}
EXPORT_SYMBOL(cxgb4_unregister_uld);
+/* Check if netdev on which event is occured belongs to us or not. Return
+ * suceess (1) if it belongs otherwise failure (0).
+ */
+static int cxgb4_netdev(struct net_device *netdev)
+{
+ struct adapter *adap;
+ int i;
+
+ spin_lock(&adap_rcu_lock);
+ list_for_each_entry_rcu(adap, &adap_rcu_list, rcu_node)
+ for (i = 0; i < MAX_NPORTS; i++)
+ if (adap->port[i] == netdev) {
+ spin_unlock(&adap_rcu_lock);
+ return 1;
+ }
+ spin_unlock(&adap_rcu_lock);
+ return 0;
+}
+
+static int clip_add(struct net_device *event_dev, struct inet6_ifaddr *ifa,
+ unsigned long event)
+{
+ int ret = NOTIFY_DONE;
+
+ rcu_read_lock();
+ if (cxgb4_netdev(event_dev)) {
+ switch (event) {
+ case NETDEV_UP:
+ ret = cxgb4_clip_get(event_dev,
+ (const struct in6_addr *)ifa->addr.s6_addr);
+ if (ret < 0) {
+ rcu_read_unlock();
+ return ret;
+ }
+ ret = NOTIFY_OK;
+ break;
+ case NETDEV_DOWN:
+ cxgb4_clip_release(event_dev,
+ (const struct in6_addr *)ifa->addr.s6_addr);
+ ret = NOTIFY_OK;
+ break;
+ default:
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static int cxgb4_inet6addr_handler(struct notifier_block *this,
+ unsigned long event, void *data)
+{
+ struct inet6_ifaddr *ifa = data;
+ struct net_device *event_dev;
+ int ret = NOTIFY_DONE;
+ int cnt;
+ struct bonding *bond = netdev_priv(ifa->idev->dev);
+ struct slave *slave;
+ struct pci_dev *first_pdev = NULL;
+
+ if (ifa->idev->dev->priv_flags & IFF_802_1Q_VLAN) {
+ event_dev = vlan_dev_real_dev(ifa->idev->dev);
+ ret = clip_add(event_dev, ifa, event);
+ } else if (ifa->idev->dev->flags & IFF_MASTER) {
+ /* It is possible that two different adapters are bonded in one
+ * bond. We need to find such different adapters and add clip
+ * in all of them only once.
+ */
+ read_lock(&bond->lock);
+ bond_for_each_slave(bond, slave, cnt) {
+ if (!first_pdev) {
+ ret = clip_add(slave->dev, ifa, event);
+ /* If clip_add is success then only initialize
+ * first_pdev since it means it is our device
+ */
+ if (ret == NOTIFY_OK)
+ first_pdev = to_pci_dev(
+ slave->dev->dev.parent);
+ } else if (first_pdev !=
+ to_pci_dev(slave->dev->dev.parent))
+ ret = clip_add(slave->dev, ifa, event);
+ }
+ read_unlock(&bond->lock);
+ } else
+ ret = clip_add(ifa->idev->dev, ifa, event);
+
+ return ret;
+}
+
+static struct notifier_block cxgb4_inet6addr_notifier = {
+ .notifier_call = cxgb4_inet6addr_handler
+};
+
+/* Retrieves IPv6 addresses from a root device (bond, vlan) associated with
+ * a physical device.
+ * The physical device reference is needed to send the actul CLIP command.
+ */
+static int update_dev_clip(struct net_device *root_dev, struct net_device *dev)
+{
+ struct inet6_dev *idev = NULL;
+ struct inet6_ifaddr *ifa;
+ int ret = 0;
+
+ idev = __in6_dev_get(root_dev);
+ if (!idev)
+ return ret;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ ret = cxgb4_clip_get(dev,
+ (const struct in6_addr *)ifa->addr.s6_addr);
+ if (ret < 0)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+
+ return ret;
+}
+
+static int update_root_dev_clip(struct net_device *dev)
+{
+ struct net_device *root_dev = NULL;
+ int i, ret = 0;
+
+ /* First populate the real net device's IPv6 addresses */
+ ret = update_dev_clip(dev, dev);
+ if (ret)
+ return ret;
+
+ /* Parse all bond and vlan devices layered on top of the physical dev */
+ for (i = 0; i < VLAN_N_VID; i++) {
+ root_dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), i);
+ if (!root_dev)
+ continue;
+
+ ret = update_dev_clip(root_dev, dev);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static void update_clip(const struct adapter *adap)
+{
+ int i;
+ struct net_device *dev;
+ int ret;
+
+ rcu_read_lock();
+
+ for (i = 0; i < MAX_NPORTS; i++) {
+ dev = adap->port[i];
+ ret = 0;
+
+ if (dev)
+ ret = update_root_dev_clip(dev);
+
+ if (ret < 0)
+ break;
+ }
+ rcu_read_unlock();
+}
+
/**
* cxgb_up - enable the adapter
* @adap: adapter being enabled
@@ -3854,6 +4135,7 @@ static int cxgb_up(struct adapter *adap)
t4_intr_enable(adap);
adap->flags |= FULL_INIT_DONE;
notify_ulds(adap, CXGB4_STATE_UP);
+ update_clip(adap);
out:
return err;
irq_err:
@@ -5870,11 +6152,15 @@ static int __init cxgb4_init_module(void)
ret = pci_register_driver(&cxgb4_driver);
if (ret < 0)
debugfs_remove(cxgb4_debugfs_root);
+
+ register_inet6addr_notifier(&cxgb4_inet6addr_notifier);
+
return ret;
}
static void __exit cxgb4_cleanup_module(void)
{
+ unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier);
pci_unregister_driver(&cxgb4_driver);
debugfs_remove(cxgb4_debugfs_root); /* NULL ok */
flush_workqueue(workq);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 4faf4d067ee7..6f21f2451c30 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -154,6 +154,11 @@ struct in6_addr;
int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
__be32 sip, __be16 sport, __be16 vlan,
unsigned int queue);
+int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
+ const struct in6_addr *sip, __be16 sport,
+ unsigned int queue);
+int cxgb4_remove_server(const struct net_device *dev, unsigned int stid,
+ unsigned int queue, bool ipv6);
int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
__be32 sip, __be16 sport, __be16 vlan,
unsigned int queue,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 01d484441200..cd6874b571ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -320,6 +320,21 @@ struct cpl_act_open_req6 {
__be32 opt2;
};
+struct cpl_t5_act_open_req6 {
+ WR_HDR;
+ union opcode_tid ot;
+ __be16 local_port;
+ __be16 peer_port;
+ __be64 local_ip_hi;
+ __be64 local_ip_lo;
+ __be64 peer_ip_hi;
+ __be64 peer_ip_lo;
+ __be64 opt0;
+ __be32 rsvd;
+ __be32 opt2;
+ __be64 params;
+};
+
struct cpl_act_open_rpl {
union opcode_tid ot;
__be32 atid_status;
@@ -405,7 +420,7 @@ struct cpl_close_listsvr_req {
WR_HDR;
union opcode_tid ot;
__be16 reply_ctrl;
-#define LISTSVR_IPV6 (1 << 14)
+#define LISTSVR_IPV6(x) ((x) << 14)
__be16 rsvd;
};
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index d1c755f78aaf..6f77ac487743 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -616,6 +616,7 @@ enum fw_cmd_opcodes {
FW_RSS_IND_TBL_CMD = 0x20,
FW_RSS_GLB_CONFIG_CMD = 0x22,
FW_RSS_VI_CONFIG_CMD = 0x23,
+ FW_CLIP_CMD = 0x28,
FW_LASTC2E_CMD = 0x40,
FW_ERROR_CMD = 0x80,
FW_DEBUG_CMD = 0x81,
@@ -2062,6 +2063,28 @@ struct fw_rss_vi_config_cmd {
} u;
};
+struct fw_clip_cmd {
+ __be32 op_to_write;
+ __be32 alloc_to_len16;
+ __be64 ip_hi;
+ __be64 ip_lo;
+ __be32 r4[2];
+};
+
+#define S_FW_CLIP_CMD_ALLOC 31
+#define M_FW_CLIP_CMD_ALLOC 0x1
+#define V_FW_CLIP_CMD_ALLOC(x) ((x) << S_FW_CLIP_CMD_ALLOC)
+#define G_FW_CLIP_CMD_ALLOC(x) \
+ (((x) >> S_FW_CLIP_CMD_ALLOC) & M_FW_CLIP_CMD_ALLOC)
+#define F_FW_CLIP_CMD_ALLOC V_FW_CLIP_CMD_ALLOC(1U)
+
+#define S_FW_CLIP_CMD_FREE 30
+#define M_FW_CLIP_CMD_FREE 0x1
+#define V_FW_CLIP_CMD_FREE(x) ((x) << S_FW_CLIP_CMD_FREE)
+#define G_FW_CLIP_CMD_FREE(x) \
+ (((x) >> S_FW_CLIP_CMD_FREE) & M_FW_CLIP_CMD_FREE)
+#define F_FW_CLIP_CMD_FREE V_FW_CLIP_CMD_FREE(1U)
+
enum fw_error_type {
FW_ERROR_TYPE_EXCEPTION = 0x0,
FW_ERROR_TYPE_HWMODULE = 0x1,
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index f984a89c27df..dd6876321116 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1909,7 +1909,8 @@ static int qp_get_mtt_size(struct mlx4_qp_context *qpc)
int log_rq_stride = qpc->rq_size_stride & 7;
int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1;
int rss = (be32_to_cpu(qpc->flags) >> 13) & 1;
- int xrc = (be32_to_cpu(qpc->local_qpn) >> 23) & 1;
+ u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+ int xrc = (ts == MLX4_QP_ST_XRC) ? 1 : 0;
int sq_size;
int rq_size;
int total_pages;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 52c23a892bab..d73423c37c25 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1052,11 +1052,6 @@ struct _rule_hw {
};
};
-/* translating DMFS verbs sniffer rule to the FW API would need two reg IDs */
-struct mlx4_flow_handle {
- u64 reg_id[2];
-};
-
int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
enum mlx4_net_trans_promisc_mode mode);
int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 645c3cedce9c..e393171e2fac 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -116,7 +116,8 @@ enum ib_device_cap_flags {
IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
- IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24)
+ IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
+ IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29)
};
enum ib_atomic_cap {
@@ -635,6 +636,12 @@ enum ib_qp_create_flags {
IB_QP_CREATE_RESERVED_END = 1 << 31,
};
+
+/*
+ * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
+ * callback to destroy the passed in QP.
+ */
+
struct ib_qp_init_attr {
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
@@ -953,6 +960,7 @@ struct ib_ucontext {
struct list_head srq_list;
struct list_head ah_list;
struct list_head xrcd_list;
+ struct list_head rule_list;
int closing;
};
@@ -1033,7 +1041,8 @@ struct ib_qp {
struct ib_srq *srq;
struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct list_head xrcd_list;
- atomic_t usecnt; /* count times opened, mcast attaches */
+ /* count times opened, mcast attaches, flow attaches */
+ atomic_t usecnt;
struct list_head open_list;
struct ib_qp *real_qp;
struct ib_uobject *uobject;
@@ -1068,6 +1077,112 @@ struct ib_fmr {
u32 rkey;
};
+/* Supported steering options */
+enum ib_flow_attr_type {
+ /* steering according to rule specifications */
+ IB_FLOW_ATTR_NORMAL = 0x0,
+ /* default unicast and multicast rule -
+ * receive all Eth traffic which isn't steered to any QP
+ */
+ IB_FLOW_ATTR_ALL_DEFAULT = 0x1,
+ /* default multicast rule -
+ * receive all Eth multicast traffic which isn't steered to any QP
+ */
+ IB_FLOW_ATTR_MC_DEFAULT = 0x2,
+ /* sniffer rule - receive all port traffic */
+ IB_FLOW_ATTR_SNIFFER = 0x3
+};
+
+/* Supported steering header types */
+enum ib_flow_spec_type {
+ /* L2 headers*/
+ IB_FLOW_SPEC_ETH = 0x20,
+ /* L3 header*/
+ IB_FLOW_SPEC_IPV4 = 0x30,
+ /* L4 headers*/
+ IB_FLOW_SPEC_TCP = 0x40,
+ IB_FLOW_SPEC_UDP = 0x41
+};
+
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+
+/* Flow steering rule priority is set according to it's domain.
+ * Lower domain value means higher priority.
+ */
+enum ib_flow_domain {
+ IB_FLOW_DOMAIN_USER,
+ IB_FLOW_DOMAIN_ETHTOOL,
+ IB_FLOW_DOMAIN_RFS,
+ IB_FLOW_DOMAIN_NIC,
+ IB_FLOW_DOMAIN_NUM /* Must be last */
+};
+
+struct ib_flow_eth_filter {
+ u8 dst_mac[6];
+ u8 src_mac[6];
+ __be16 ether_type;
+ __be16 vlan_tag;
+};
+
+struct ib_flow_spec_eth {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_eth_filter val;
+ struct ib_flow_eth_filter mask;
+};
+
+struct ib_flow_ipv4_filter {
+ __be32 src_ip;
+ __be32 dst_ip;
+};
+
+struct ib_flow_spec_ipv4 {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_ipv4_filter val;
+ struct ib_flow_ipv4_filter mask;
+};
+
+struct ib_flow_tcp_udp_filter {
+ __be16 dst_port;
+ __be16 src_port;
+};
+
+struct ib_flow_spec_tcp_udp {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_tcp_udp_filter val;
+ struct ib_flow_tcp_udp_filter mask;
+};
+
+union ib_flow_spec {
+ struct {
+ enum ib_flow_spec_type type;
+ u16 size;
+ };
+ struct ib_flow_spec_eth eth;
+ struct ib_flow_spec_ipv4 ipv4;
+ struct ib_flow_spec_tcp_udp tcp_udp;
+};
+
+struct ib_flow_attr {
+ enum ib_flow_attr_type type;
+ u16 size;
+ u16 priority;
+ u32 flags;
+ u8 num_of_specs;
+ u8 port;
+ /* Following are the optional layers according to user request
+ * struct ib_flow_spec_xxx
+ * struct ib_flow_spec_yyy
+ */
+};
+
+struct ib_flow {
+ struct ib_qp *qp;
+ struct ib_uobject *uobject;
+};
+
struct ib_mad;
struct ib_grh;
@@ -1300,6 +1415,11 @@ struct ib_device {
struct ib_ucontext *ucontext,
struct ib_udata *udata);
int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+ struct ib_flow * (*create_flow)(struct ib_qp *qp,
+ struct ib_flow_attr
+ *flow_attr,
+ int domain);
+ int (*destroy_flow)(struct ib_flow *flow_id);
struct ib_dma_mapping_ops *dma_ops;
@@ -2260,4 +2380,8 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
*/
int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr, int domain);
+int ib_destroy_flow(struct ib_flow *flow_id);
+
#endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 1a046b1595cc..1017e0bdf8ba 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -49,8 +49,8 @@ enum iw_cm_event_type {
struct iw_cm_event {
enum iw_cm_event_type event;
int status;
- struct sockaddr_in local_addr;
- struct sockaddr_in remote_addr;
+ struct sockaddr_storage local_addr;
+ struct sockaddr_storage remote_addr;
void *private_data;
void *provider_data;
u8 private_data_len;
@@ -83,8 +83,8 @@ struct iw_cm_id {
iw_cm_handler cm_handler; /* client callback function */
void *context; /* client cb context */
struct ib_device *device;
- struct sockaddr_in local_addr;
- struct sockaddr_in remote_addr;
+ struct sockaddr_storage local_addr;
+ struct sockaddr_storage remote_addr;
void *provider_data; /* provider private data */
iw_event_handler event_handler; /* cb for provider
events */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 805711ea2005..0b233c56b0e4 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -43,6 +43,7 @@
* compatibility are made.
*/
#define IB_USER_VERBS_ABI_VERSION 6
+#define IB_USER_VERBS_CMD_THRESHOLD 50
enum {
IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -85,7 +86,9 @@ enum {
IB_USER_VERBS_CMD_OPEN_XRCD,
IB_USER_VERBS_CMD_CLOSE_XRCD,
IB_USER_VERBS_CMD_CREATE_XSRQ,
- IB_USER_VERBS_CMD_OPEN_QP
+ IB_USER_VERBS_CMD_OPEN_QP,
+ IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
+ IB_USER_VERBS_CMD_DESTROY_FLOW
};
/*
@@ -123,6 +126,15 @@ struct ib_uverbs_cmd_hdr {
__u16 out_words;
};
+struct ib_uverbs_cmd_hdr_ex {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u16 provider_in_words;
+ __u16 provider_out_words;
+ __u32 cmd_hdr_reserved;
+};
+
struct ib_uverbs_get_context {
__u64 response;
__u64 driver_data[0];
@@ -684,6 +696,91 @@ struct ib_uverbs_detach_mcast {
__u64 driver_data[0];
};
+struct ib_kern_eth_filter {
+ __u8 dst_mac[6];
+ __u8 src_mac[6];
+ __be16 ether_type;
+ __be16 vlan_tag;
+};
+
+struct ib_kern_spec_eth {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ struct ib_kern_eth_filter val;
+ struct ib_kern_eth_filter mask;
+};
+
+struct ib_kern_ipv4_filter {
+ __be32 src_ip;
+ __be32 dst_ip;
+};
+
+struct ib_kern_spec_ipv4 {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ struct ib_kern_ipv4_filter val;
+ struct ib_kern_ipv4_filter mask;
+};
+
+struct ib_kern_tcp_udp_filter {
+ __be16 dst_port;
+ __be16 src_port;
+};
+
+struct ib_kern_spec_tcp_udp {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ struct ib_kern_tcp_udp_filter val;
+ struct ib_kern_tcp_udp_filter mask;
+};
+
+struct ib_kern_spec {
+ union {
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ struct ib_kern_spec_eth eth;
+ struct ib_kern_spec_ipv4 ipv4;
+ struct ib_kern_spec_tcp_udp tcp_udp;
+ };
+};
+
+struct ib_kern_flow_attr {
+ __u32 type;
+ __u16 size;
+ __u16 priority;
+ __u8 num_of_specs;
+ __u8 reserved[2];
+ __u8 port;
+ __u32 flags;
+ /* Following are the optional layers according to user request
+ * struct ib_flow_spec_xxx
+ * struct ib_flow_spec_yyy
+ */
+};
+
+struct ib_uverbs_create_flow {
+ __u32 comp_mask;
+ __u64 response;
+ __u32 qp_handle;
+ struct ib_kern_flow_attr flow_attr;
+};
+
+struct ib_uverbs_create_flow_resp {
+ __u32 comp_mask;
+ __u32 flow_handle;
+};
+
+struct ib_uverbs_destroy_flow {
+ __u32 comp_mask;
+ __u32 flow_handle;
+};
+
struct ib_uverbs_create_srq {
__u64 response;
__u64 user_handle;