summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/cm.c78
-rw-r--r--drivers/infiniband/core/cm_trace.h2
-rw-r--r--drivers/infiniband/core/cma.c25
-rw-r--r--drivers/infiniband/core/cma_trace.h2
-rw-r--r--drivers/infiniband/core/iwcm.c29
-rw-r--r--drivers/infiniband/core/mad_rmpp.c2
-rw-r--r--drivers/infiniband/core/umem_odp.c271
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/core/verbs.c2
9 files changed, 152 insertions, 261 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 142170473e75..8670e58675c6 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -36,6 +36,7 @@ MODULE_LICENSE("Dual BSD/GPL");
#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */
#define CM_DIRECT_RETRY_CTX ((void *) 1UL)
+#define CM_MRA_SETTING 24 /* 4.096us * 2^24 = ~68.7 seconds */
static const char * const ibcm_rej_reason_strs[] = {
[IB_CM_REJ_NO_QP] = "no QP",
@@ -167,7 +168,7 @@ struct cm_port {
struct cm_device {
struct kref kref;
struct list_head list;
- spinlock_t mad_agent_lock;
+ rwlock_t mad_agent_lock;
struct ib_device *ib_device;
u8 ack_delay;
int going_down;
@@ -241,7 +242,6 @@ struct cm_id_private {
u8 initiator_depth;
u8 retry_count;
u8 rnr_retry_count;
- u8 service_timeout;
u8 target_ack_delay;
struct list_head work_list;
@@ -285,7 +285,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
if (!cm_id_priv->av.port)
return ERR_PTR(-EINVAL);
- spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
mad_agent = cm_id_priv->av.port->mad_agent;
if (!mad_agent) {
m = ERR_PTR(-EINVAL);
@@ -311,7 +311,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
m->ah = ah;
out:
- spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
return m;
}
@@ -1297,10 +1297,10 @@ static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
if (!cm_id_priv->av.port)
return cpu_to_be64(low_tid);
- spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
if (cm_id_priv->av.port->mad_agent)
hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32;
- spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
return cpu_to_be64(hi_tid | low_tid);
}
@@ -1872,7 +1872,7 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
static void cm_format_mra(struct cm_mra_msg *mra_msg,
struct cm_id_private *cm_id_priv,
- enum cm_msg_response msg_mraed, u8 service_timeout,
+ enum cm_msg_response msg_mraed,
const void *private_data, u8 private_data_len)
{
cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
@@ -1881,7 +1881,7 @@ static void cm_format_mra(struct cm_mra_msg *mra_msg,
be32_to_cpu(cm_id_priv->id.local_id));
IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
be32_to_cpu(cm_id_priv->id.remote_id));
- IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, service_timeout);
+ IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, CM_MRA_SETTING);
if (private_data && private_data_len)
IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
@@ -1960,7 +1960,7 @@ static void cm_dup_req_handler(struct cm_work *work,
switch (cm_id_priv->id.state) {
case IB_CM_MRA_REQ_SENT:
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
- CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
+ CM_MSG_RESPONSE_REQ,
cm_id_priv->private_data,
cm_id_priv->private_data_len);
break;
@@ -2454,7 +2454,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
cm_id_priv->private_data_len);
else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
- CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
+ CM_MSG_RESPONSE_REP,
cm_id_priv->private_data,
cm_id_priv->private_data_len);
else
@@ -3094,26 +3094,13 @@ out:
return -EINVAL;
}
-int ib_send_cm_mra(struct ib_cm_id *cm_id,
- u8 service_timeout,
- const void *private_data,
- u8 private_data_len)
+int ib_prepare_cm_mra(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
- struct ib_mad_send_buf *msg;
enum ib_cm_state cm_state;
enum ib_cm_lap_state lap_state;
- enum cm_msg_response msg_response;
- void *data;
unsigned long flags;
- int ret;
-
- if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
- return -EINVAL;
-
- data = cm_copy_private_data(private_data, private_data_len);
- if (IS_ERR(data))
- return PTR_ERR(data);
+ int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3122,58 +3109,33 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
case IB_CM_REQ_RCVD:
cm_state = IB_CM_MRA_REQ_SENT;
lap_state = cm_id->lap_state;
- msg_response = CM_MSG_RESPONSE_REQ;
break;
case IB_CM_REP_RCVD:
cm_state = IB_CM_MRA_REP_SENT;
lap_state = cm_id->lap_state;
- msg_response = CM_MSG_RESPONSE_REP;
break;
case IB_CM_ESTABLISHED:
if (cm_id->lap_state == IB_CM_LAP_RCVD) {
cm_state = cm_id->state;
lap_state = IB_CM_MRA_LAP_SENT;
- msg_response = CM_MSG_RESPONSE_OTHER;
break;
}
fallthrough;
default:
- trace_icm_send_mra_unknown_err(&cm_id_priv->id);
+ trace_icm_prepare_mra_unknown_err(&cm_id_priv->id);
ret = -EINVAL;
goto error_unlock;
}
- if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
- msg = cm_alloc_msg(cm_id_priv);
- if (IS_ERR(msg)) {
- ret = PTR_ERR(msg);
- goto error_unlock;
- }
-
- cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
- msg_response, service_timeout,
- private_data, private_data_len);
- trace_icm_send_mra(cm_id);
- ret = ib_post_send_mad(msg, NULL);
- if (ret)
- goto error_free_msg;
- }
-
cm_id->state = cm_state;
cm_id->lap_state = lap_state;
- cm_id_priv->service_timeout = service_timeout;
- cm_set_private_data(cm_id_priv, data, private_data_len);
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return 0;
+ cm_set_private_data(cm_id_priv, NULL, 0);
-error_free_msg:
- cm_free_msg(msg);
error_unlock:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
return ret;
}
-EXPORT_SYMBOL(ib_send_cm_mra);
+EXPORT_SYMBOL(ib_prepare_cm_mra);
static struct cm_id_private *cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
{
@@ -3377,7 +3339,6 @@ static int cm_lap_handler(struct cm_work *work)
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_OTHER,
- cm_id_priv->service_timeout,
cm_id_priv->private_data,
cm_id_priv->private_data_len);
spin_unlock_irq(&cm_id_priv->lock);
@@ -3786,7 +3747,8 @@ static void cm_process_send_error(struct cm_id_private *cm_id_priv,
spin_lock_irq(&cm_id_priv->lock);
if (msg != cm_id_priv->msg) {
spin_unlock_irq(&cm_id_priv->lock);
- cm_free_priv_msg(msg);
+ cm_free_msg(msg);
+ cm_deref_id(cm_id_priv);
return;
}
cm_free_priv_msg(msg);
@@ -4378,7 +4340,7 @@ static int cm_add_one(struct ib_device *ib_device)
return -ENOMEM;
kref_init(&cm_dev->kref);
- spin_lock_init(&cm_dev->mad_agent_lock);
+ rwlock_init(&cm_dev->mad_agent_lock);
cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
@@ -4494,9 +4456,9 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
* The above ensures no call paths from the work are running,
* the remaining paths all take the mad_agent_lock.
*/
- spin_lock(&cm_dev->mad_agent_lock);
+ write_lock(&cm_dev->mad_agent_lock);
port->mad_agent = NULL;
- spin_unlock(&cm_dev->mad_agent_lock);
+ write_unlock(&cm_dev->mad_agent_lock);
ib_unregister_mad_agent(mad_agent);
ib_port_unregister_client_groups(ib_device, i,
cm_counter_groups);
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
index 944d9071245d..4a4987da69d4 100644
--- a/drivers/infiniband/core/cm_trace.h
+++ b/drivers/infiniband/core/cm_trace.h
@@ -229,7 +229,7 @@ DEFINE_CM_ERR_EVENT(send_drep);
DEFINE_CM_ERR_EVENT(dreq_unknown);
DEFINE_CM_ERR_EVENT(send_unknown_rej);
DEFINE_CM_ERR_EVENT(rej_unknown);
-DEFINE_CM_ERR_EVENT(send_mra_unknown);
+DEFINE_CM_ERR_EVENT(prepare_mra_unknown);
DEFINE_CM_ERR_EVENT(mra_unknown);
DEFINE_CM_ERR_EVENT(qp_init);
DEFINE_CM_ERR_EVENT(qp_rtr);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index ab31eefa916b..9b471548e7ae 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -46,7 +46,6 @@ MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
-#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
#define CMA_IBOE_PACKET_LIFETIME 16
#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
@@ -146,19 +145,6 @@ struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id)
}
EXPORT_SYMBOL(rdma_iw_cm_id);
-/**
- * rdma_res_to_id() - return the rdma_cm_id pointer for this restrack.
- * @res: rdma resource tracking entry pointer
- */
-struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
-{
- struct rdma_id_private *id_priv =
- container_of(res, struct rdma_id_private, res);
-
- return &id_priv->id;
-}
-EXPORT_SYMBOL(rdma_res_to_id);
-
static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
@@ -2214,8 +2200,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
case IB_CM_REP_RECEIVED:
if (state == RDMA_CM_CONNECT &&
(id_priv->id.qp_type != IB_QPT_UD)) {
- trace_cm_send_mra(id_priv);
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ trace_cm_prepare_mra(id_priv);
+ ib_prepare_cm_mra(cm_id);
}
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
@@ -2476,8 +2462,8 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
conn_id->id.qp_type != IB_QPT_UD) {
- trace_cm_send_mra(cm_id->context);
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ trace_cm_prepare_mra(cm_id->context);
+ ib_prepare_cm_mra(cm_id);
}
mutex_unlock(&conn_id->handler_mutex);
@@ -5245,7 +5231,8 @@ static int cma_netevent_callback(struct notifier_block *self,
neigh->ha, ETH_ALEN))
continue;
cma_id_get(current_id);
- queue_work(cma_wq, &current_id->id.net_work);
+ if (!queue_work(cma_wq, &current_id->id.net_work))
+ cma_id_put(current_id);
}
out:
spin_unlock_irqrestore(&id_table_lock, flags);
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
index dc622f3778be..3456d5f3aa47 100644
--- a/drivers/infiniband/core/cma_trace.h
+++ b/drivers/infiniband/core/cma_trace.h
@@ -55,7 +55,7 @@ DECLARE_EVENT_CLASS(cma_fsm_class,
DEFINE_CMA_FSM_EVENT(send_rtu);
DEFINE_CMA_FSM_EVENT(send_rej);
-DEFINE_CMA_FSM_EVENT(send_mra);
+DEFINE_CMA_FSM_EVENT(prepare_mra);
DEFINE_CMA_FSM_EVENT(send_sidr_req);
DEFINE_CMA_FSM_EVENT(send_sidr_rep);
DEFINE_CMA_FSM_EVENT(disconnect);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index f4486cbd8f45..62410578dec3 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -368,12 +368,9 @@ EXPORT_SYMBOL(iw_cm_disconnect);
/*
* CM_ID <-- DESTROYING
*
- * Clean up all resources associated with the connection and release
- * the initial reference taken by iw_create_cm_id.
- *
- * Returns true if and only if the last cm_id_priv reference has been dropped.
+ * Clean up all resources associated with the connection.
*/
-static bool destroy_cm_id(struct iw_cm_id *cm_id)
+static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
struct ib_qp *qp;
@@ -442,20 +439,22 @@ static bool destroy_cm_id(struct iw_cm_id *cm_id)
iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
}
-
- return iwcm_deref_id(cm_id_priv);
}
/*
- * This function is only called by the application thread and cannot
- * be called by the event thread. The function will wait for all
- * references to be released on the cm_id and then kfree the cm_id
- * object.
+ * Destroy cm_id. If the cm_id still has other references, wait for all
+ * references to be released on the cm_id and then release the initial
+ * reference taken by iw_create_cm_id.
*/
void iw_destroy_cm_id(struct iw_cm_id *cm_id)
{
- if (!destroy_cm_id(cm_id))
+ struct iwcm_id_private *cm_id_priv;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ destroy_cm_id(cm_id);
+ if (refcount_read(&cm_id_priv->refcount) > 1)
flush_workqueue(iwcm_wq);
+ iwcm_deref_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@@ -1035,8 +1034,10 @@ static void cm_work_handler(struct work_struct *_work)
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
ret = process_event(cm_id_priv, &levent);
- if (ret)
- WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id));
+ if (ret) {
+ destroy_cm_id(&cm_id_priv->id);
+ WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
+ }
} else
pr_debug("dropping event %d\n", levent.event);
if (iwcm_deref_id(cm_id_priv))
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 8af0619a39cd..b4b10e8a6495 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -158,7 +158,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
recv_wc->recv_buf.grh, agent->port_num);
if (IS_ERR(ah))
- return (void *) ah;
+ return ERR_CAST(ah);
hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index c48ef6083020..c752ae9fad6c 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -41,67 +41,72 @@
#include <linux/hugetlb.h>
#include <linux/interval_tree.h>
#include <linux/hmm.h>
+#include <linux/hmm-dma.h>
#include <linux/pagemap.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
-static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
- const struct mmu_interval_notifier_ops *ops)
+static void ib_init_umem_implicit_odp(struct ib_umem_odp *umem_odp)
{
- int ret;
+ umem_odp->is_implicit_odp = 1;
+ umem_odp->umem.is_odp = 1;
+ mutex_init(&umem_odp->umem_mutex);
+}
+
+static int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
+ const struct mmu_interval_notifier_ops *ops)
+{
+ struct ib_device *dev = umem_odp->umem.ibdev;
+ size_t page_size = 1UL << umem_odp->page_shift;
+ struct hmm_dma_map *map;
+ unsigned long start;
+ unsigned long end;
+ size_t nr_entries;
+ int ret = 0;
umem_odp->umem.is_odp = 1;
mutex_init(&umem_odp->umem_mutex);
- if (!umem_odp->is_implicit_odp) {
- size_t page_size = 1UL << umem_odp->page_shift;
- unsigned long start;
- unsigned long end;
- size_t ndmas, npfns;
-
- start = ALIGN_DOWN(umem_odp->umem.address, page_size);
- if (check_add_overflow(umem_odp->umem.address,
- (unsigned long)umem_odp->umem.length,
- &end))
- return -EOVERFLOW;
- end = ALIGN(end, page_size);
- if (unlikely(end < page_size))
- return -EOVERFLOW;
-
- ndmas = (end - start) >> umem_odp->page_shift;
- if (!ndmas)
- return -EINVAL;
-
- npfns = (end - start) >> PAGE_SHIFT;
- umem_odp->pfn_list = kvcalloc(
- npfns, sizeof(*umem_odp->pfn_list),
- GFP_KERNEL | __GFP_NOWARN);
- if (!umem_odp->pfn_list)
- return -ENOMEM;
-
- umem_odp->dma_list = kvcalloc(
- ndmas, sizeof(*umem_odp->dma_list),
- GFP_KERNEL | __GFP_NOWARN);
- if (!umem_odp->dma_list) {
+ start = ALIGN_DOWN(umem_odp->umem.address, page_size);
+ if (check_add_overflow(umem_odp->umem.address,
+ (unsigned long)umem_odp->umem.length, &end))
+ return -EOVERFLOW;
+ end = ALIGN(end, page_size);
+ if (unlikely(end < page_size))
+ return -EOVERFLOW;
+
+ nr_entries = (end - start) >> PAGE_SHIFT;
+ if (!(nr_entries * PAGE_SIZE / page_size))
+ return -EINVAL;
+
+ map = &umem_odp->map;
+ if (ib_uses_virt_dma(dev)) {
+ map->pfn_list = kvcalloc(nr_entries, sizeof(*map->pfn_list),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!map->pfn_list)
ret = -ENOMEM;
- goto out_pfn_list;
- }
+ } else
+ ret = hmm_dma_map_alloc(dev->dma_device, map,
+ (end - start) >> PAGE_SHIFT,
+ 1 << umem_odp->page_shift);
+ if (ret)
+ return ret;
- ret = mmu_interval_notifier_insert(&umem_odp->notifier,
- umem_odp->umem.owning_mm,
- start, end - start, ops);
- if (ret)
- goto out_dma_list;
- }
+ ret = mmu_interval_notifier_insert(&umem_odp->notifier,
+ umem_odp->umem.owning_mm, start,
+ end - start, ops);
+ if (ret)
+ goto out_free_map;
return 0;
-out_dma_list:
- kvfree(umem_odp->dma_list);
-out_pfn_list:
- kvfree(umem_odp->pfn_list);
+out_free_map:
+ if (ib_uses_virt_dma(dev))
+ kfree(map->pfn_list);
+ else
+ hmm_dma_map_free(dev->dma_device, map);
return ret;
}
@@ -120,7 +125,6 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
{
struct ib_umem *umem;
struct ib_umem_odp *umem_odp;
- int ret;
if (access & IB_ACCESS_HUGETLB)
return ERR_PTR(-EINVAL);
@@ -132,16 +136,10 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
umem->ibdev = device;
umem->writable = ib_access_writable(access);
umem->owning_mm = current->mm;
- umem_odp->is_implicit_odp = 1;
umem_odp->page_shift = PAGE_SHIFT;
umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
- ret = ib_init_umem_odp(umem_odp, NULL);
- if (ret) {
- put_pid(umem_odp->tgid);
- kfree(umem_odp);
- return ERR_PTR(ret);
- }
+ ib_init_umem_implicit_odp(umem_odp);
return umem_odp;
}
EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
@@ -262,74 +260,41 @@ err_put_pid:
}
EXPORT_SYMBOL(ib_umem_odp_get);
-void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
+static void ib_umem_odp_free(struct ib_umem_odp *umem_odp)
{
+ struct ib_device *dev = umem_odp->umem.ibdev;
+
/*
* Ensure that no more pages are mapped in the umem.
*
* It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more.
*/
- if (!umem_odp->is_implicit_odp) {
- mutex_lock(&umem_odp->umem_mutex);
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
- ib_umem_end(umem_odp));
- mutex_unlock(&umem_odp->umem_mutex);
- mmu_interval_notifier_remove(&umem_odp->notifier);
- kvfree(umem_odp->dma_list);
- kvfree(umem_odp->pfn_list);
- }
- put_pid(umem_odp->tgid);
- kfree(umem_odp);
+ mutex_lock(&umem_odp->umem_mutex);
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
+ mutex_unlock(&umem_odp->umem_mutex);
+ mmu_interval_notifier_remove(&umem_odp->notifier);
+ if (ib_uses_virt_dma(dev))
+ kfree(umem_odp->map.pfn_list);
+ else
+ hmm_dma_map_free(dev->dma_device, &umem_odp->map);
}
-EXPORT_SYMBOL(ib_umem_odp_release);
-/*
- * Map for DMA and insert a single page into the on-demand paging page tables.
- *
- * @umem: the umem to insert the page to.
- * @dma_index: index in the umem to add the dma to.
- * @page: the page struct to map and add.
- * @access_mask: access permissions needed for this page.
- *
- * The function returns -EFAULT if the DMA mapping operation fails.
- *
- */
-static int ib_umem_odp_map_dma_single_page(
- struct ib_umem_odp *umem_odp,
- unsigned int dma_index,
- struct page *page,
- u64 access_mask)
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{
- struct ib_device *dev = umem_odp->umem.ibdev;
- dma_addr_t *dma_addr = &umem_odp->dma_list[dma_index];
-
- if (*dma_addr) {
- /*
- * If the page is already dma mapped it means it went through
- * a non-invalidating trasition, like read-only to writable.
- * Resync the flags.
- */
- *dma_addr = (*dma_addr & ODP_DMA_ADDR_MASK) | access_mask;
- return 0;
- }
+ if (!umem_odp->is_implicit_odp)
+ ib_umem_odp_free(umem_odp);
- *dma_addr = ib_dma_map_page(dev, page, 0, 1 << umem_odp->page_shift,
- DMA_BIDIRECTIONAL);
- if (ib_dma_mapping_error(dev, *dma_addr)) {
- *dma_addr = 0;
- return -EFAULT;
- }
- umem_odp->npages++;
- *dma_addr |= access_mask;
- return 0;
+ put_pid(umem_odp->tgid);
+ kfree(umem_odp);
}
+EXPORT_SYMBOL(ib_umem_odp_release);
/**
* ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
*
* Maps the range passed in the argument to DMA addresses.
- * The DMA addresses of the mapped pages is updated in umem_odp->dma_list.
* Upon success the ODP MR will be locked to let caller complete its device
* page table update.
*
@@ -357,9 +322,6 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
struct hmm_range range = {};
unsigned long timeout;
- if (access_mask == 0)
- return -EINVAL;
-
if (user_virt < ib_umem_start(umem_odp) ||
user_virt + bcnt > ib_umem_end(umem_odp))
return -EFAULT;
@@ -385,11 +347,11 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
if (fault) {
range.default_flags = HMM_PFN_REQ_FAULT;
- if (access_mask & ODP_WRITE_ALLOWED_BIT)
+ if (access_mask & HMM_PFN_WRITE)
range.default_flags |= HMM_PFN_REQ_WRITE;
}
- range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]);
+ range.hmm_pfns = &(umem_odp->map.pfn_list[pfn_start_idx]);
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
retry:
@@ -417,22 +379,17 @@ retry:
for (pfn_index = 0; pfn_index < num_pfns;
pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
- if (fault) {
- /*
- * Since we asked for hmm_range_fault() to populate
- * pages it shouldn't return an error entry on success.
- */
- WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
- WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
- } else {
- if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)) {
- WARN_ON(umem_odp->dma_list[dma_index]);
- continue;
- }
- access_mask = ODP_READ_ALLOWED_BIT;
- if (range.hmm_pfns[pfn_index] & HMM_PFN_WRITE)
- access_mask |= ODP_WRITE_ALLOWED_BIT;
- }
+ /*
+ * Since we asked for hmm_range_fault() to populate
+ * pages it shouldn't return an error entry on success.
+ */
+ WARN_ON(fault && range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
+ WARN_ON(fault && !(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+ if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID))
+ continue;
+
+ if (range.hmm_pfns[pfn_index] & HMM_PFN_DMA_MAPPED)
+ continue;
hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
/* If a hugepage was detected and ODP wasn't set for, the umem
@@ -445,15 +402,6 @@ retry:
__func__, hmm_order, page_shift);
break;
}
-
- ret = ib_umem_odp_map_dma_single_page(
- umem_odp, dma_index, hmm_pfn_to_page(range.hmm_pfns[pfn_index]),
- access_mask);
- if (ret < 0) {
- ibdev_dbg(umem_odp->umem.ibdev,
- "ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
- break;
- }
}
/* upon success lock should stay on hold for the callee */
if (!ret)
@@ -473,45 +421,38 @@ EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
- dma_addr_t dma_addr;
- dma_addr_t dma;
- int idx;
- u64 addr;
struct ib_device *dev = umem_odp->umem.ibdev;
+ u64 addr;
lockdep_assert_held(&umem_odp->umem_mutex);
virt = max_t(u64, virt, ib_umem_start(umem_odp));
bound = min_t(u64, bound, ib_umem_end(umem_odp));
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
- idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
- dma = umem_odp->dma_list[idx];
-
- /* The access flags guaranteed a valid DMA address in case was NULL */
- if (dma) {
- unsigned long pfn_idx = (addr - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
- struct page *page = hmm_pfn_to_page(umem_odp->pfn_list[pfn_idx]);
-
- dma_addr = dma & ODP_DMA_ADDR_MASK;
- ib_dma_unmap_page(dev, dma_addr,
- BIT(umem_odp->page_shift),
- DMA_BIDIRECTIONAL);
- if (dma & ODP_WRITE_ALLOWED_BIT) {
- struct page *head_page = compound_head(page);
- /*
- * set_page_dirty prefers being called with
- * the page lock. However, MMU notifiers are
- * called sometimes with and sometimes without
- * the lock. We rely on the umem_mutex instead
- * to prevent other mmu notifiers from
- * continuing and allowing the page mapping to
- * be removed.
- */
- set_page_dirty(head_page);
- }
- umem_odp->dma_list[idx] = 0;
- umem_odp->npages--;
+ u64 offset = addr - ib_umem_start(umem_odp);
+ size_t idx = offset >> umem_odp->page_shift;
+ unsigned long pfn = umem_odp->map.pfn_list[idx];
+
+ if (!hmm_dma_unmap_pfn(dev->dma_device, &umem_odp->map, idx))
+ goto clear;
+
+ if (pfn & HMM_PFN_WRITE) {
+ struct page *page = hmm_pfn_to_page(pfn);
+ struct page *head_page = compound_head(page);
+ /*
+ * set_page_dirty prefers being called with
+ * the page lock. However, MMU notifiers are
+ * called sometimes with and sometimes without
+ * the lock. We rely on the umem_mutex instead
+ * to prevent other mmu notifiers from
+ * continuing and allowing the page mapping to
+ * be removed.
+ */
+ set_page_dirty(head_page);
}
+ umem_odp->npages--;
+clear:
+ umem_odp->map.pfn_list[idx] &= ~HMM_PFN_FLAGS;
}
}
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3c3bb670c805..bc9fe3ceca4d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -193,7 +193,7 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs)
fd, attrs);
if (IS_ERR(uobj))
- return (void *)uobj;
+ return ERR_CAST(uobj);
uverbs_uobject_get(uobj);
uobj_put_read(uobj);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c5e78bbefbd0..75fde0fe9989 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -572,7 +572,7 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
GFP_KERNEL : GFP_ATOMIC);
if (IS_ERR(slave)) {
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
- return (void *)slave;
+ return ERR_CAST(slave);
}
ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave);
rdma_lag_put_ah_roce_slave(slave);