diff options
author | Shiraz Saleem <shirazsaleem@microsoft.com> | 2025-02-05 02:32:07 -0800 |
---|---|---|
committer | Leon Romanovsky <leon@kernel.org> | 2025-02-06 04:11:01 -0500 |
commit | 79bccd746132afe12b8bc3785e7b74b90440060d (patch) | |
tree | c52a0faac9817975d6b09aeffc6f80db41c543dc | |
parent | cd3c5ddf823016b0c670d1965c5d312b3cf8bb7b (diff) |
RDMA/mana_ib: Add port statistics support
Implement alloc_hw_port_stats and get_hw_stats APIs to support querying
MANA VF port level statistics from rdma stat tool.
Example output from rdma stat tool:
$rdma statistic show link mana_0/1 -p
link mana_0/1
requester_timeout 45
requester_oos_nak 0
requester_rnr_nak 0
responder_rnr_nak 0
responder_oos 0
responder_dup_request 0
requester_implicit_nak 0
requester_readresp_psn_mismatch 0
nak_inv_req 0
nak_access_error 0
nak_opp_error 0
nak_inv_read 0
responder_local_len_error 0
requestor_local_prot_error 0
responder_rem_access_error 0
responder_local_qp_error 0
responder_malformed_wqe 0
general_hw_error 6
requester_rnr_nak_retries_exceeded 0
requester_retries_exceeded 5
total_fatal_error 6
received_cnps 0
num_qps_congested 0
rate_inc_events 0
num_qps_recovered 0
current_rate 100000
Signed-off-by: Shiraz Saleem <shirazsaleem@microsoft.com>
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Link: https://patch.msgid.link/1738751527-15517-1-git-send-email-kotaranov@linux.microsoft.com
Reviewed-by: Long Li <longli@microsoft.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
-rw-r--r-- | drivers/infiniband/hw/mana/Makefile | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/counters.c | 105 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/counters.h | 44 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/device.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/mana_ib.h | 37 |
5 files changed, 194 insertions, 1 deletions
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile index 79426e725122..921c05e08b11 100644 --- a/drivers/infiniband/hw/mana/Makefile +++ b/drivers/infiniband/hw/mana/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o -mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o +mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c new file mode 100644 index 000000000000..e533ce21013d --- /dev/null +++ b/drivers/infiniband/hw/mana/counters.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#include "counters.h" + +static const struct rdma_stat_desc mana_ib_port_stats_desc[] = { + [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout", + [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak", + [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak", + [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak", + [MANA_IB_RESPONDER_OOS].name = "responder_oos", + [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request", + [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak", + [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch", + [MANA_IB_NAK_INV_REQ].name = "nak_inv_req", + [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error", + [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error", + [MANA_IB_NAK_INV_READ].name = "nak_inv_read", + [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error", + [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error", + [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error", + [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error", + [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe", + [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error", + [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded", + [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded", + [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error", + [MANA_IB_RECEIVED_CNPS].name = "received_cnps", + [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested", + [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events", + [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered", + [MANA_IB_CURRENT_RATE].name = "current_rate", +}; + +struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num) +{ + return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc, + ARRAY_SIZE(mana_ib_port_stats_desc), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, + ib_dev); + struct mana_rnic_query_vf_cntrs_resp resp = {}; + struct mana_rnic_query_vf_cntrs_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS, + sizeof(req), sizeof(resp)); + req.hdr.dev_id = mdev->gdma_dev->dev_id; + req.adapter = mdev->adapter_handle; + + err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req, + sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d", + err); + return err; + } + + stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout; + stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak; + stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak; + stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak; + stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos; + stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request; + stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] = + resp.requester_implicit_nak; + stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] = + resp.requester_readresp_psn_mismatch; + stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req; + stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err; + stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err; + stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read; + stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] = + resp.responder_local_len_err; + stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] = + resp.requestor_local_prot_err; + stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] = + resp.responder_rem_access_err; + stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] = + resp.responder_local_qp_err; + stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] = + resp.responder_malformed_wqe; + stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err; + stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] = + resp.requester_rnr_nak_retries_exceeded; + stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] = + resp.requester_retries_exceeded; + stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err; + + stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps; + stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested; + stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events; + stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered; + stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate; + + return ARRAY_SIZE(mana_ib_port_stats_desc); +} diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h new file mode 100644 index 000000000000..7ff92d27f6c3 --- /dev/null +++ b/drivers/infiniband/hw/mana/counters.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Microsoft Corporation. All rights reserved. + */ + +#ifndef _COUNTERS_H_ +#define _COUNTERS_H_ + +#include "mana_ib.h" + +enum mana_ib_port_counters { + MANA_IB_REQUESTER_TIMEOUT, + MANA_IB_REQUESTER_OOS_NAK, + MANA_IB_REQUESTER_RNR_NAK, + MANA_IB_RESPONDER_RNR_NAK, + MANA_IB_RESPONDER_OOS, + MANA_IB_RESPONDER_DUP_REQUEST, + MANA_IB_REQUESTER_IMPLICIT_NAK, + MANA_IB_REQUESTER_READRESP_PSN_MISMATCH, + MANA_IB_NAK_INV_REQ, + MANA_IB_NAK_ACCESS_ERR, + MANA_IB_NAK_OPP_ERR, + MANA_IB_NAK_INV_READ, + MANA_IB_RESPONDER_LOCAL_LEN_ERR, + MANA_IB_REQUESTOR_LOCAL_PROT_ERR, + MANA_IB_RESPONDER_REM_ACCESS_ERR, + MANA_IB_RESPONDER_LOCAL_QP_ERR, + MANA_IB_RESPONDER_MALFORMED_WQE, + MANA_IB_GENERAL_HW_ERR, + MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED, + MANA_IB_REQUESTER_RETRIES_EXCEEDED, + MANA_IB_TOTAL_FATAL_ERR, + MANA_IB_RECEIVED_CNPS, + MANA_IB_NUM_QPS_CONGESTED, + MANA_IB_RATE_INC_EVENTS, + MANA_IB_NUM_QPS_RECOVERED, + MANA_IB_CURRENT_RATE, +}; + +struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num); +int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port_num, int index); +#endif /* _COUNTERS_H_ */ diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 0a7553f819ba..640a2c96b5ca 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -59,6 +59,11 @@ static const struct ib_device_ops mana_ib_dev_ops = { ib_ind_table), }; +static const struct ib_device_ops mana_ib_stats_ops = { + .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats, + .get_hw_stats = mana_ib_get_hw_stats, +}; + static int mana_ib_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { @@ -124,6 +129,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, goto deregister_device; } + ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops); + ret = mana_ib_create_eqs(dev); if (ret) { ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index ad716a90c7be..7e342e070b70 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -15,6 +15,7 @@ #include <net/mana/mana.h> #include "shadow_queue.h" +#include "counters.h" #define PAGE_SZ_BM \ (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \ @@ -205,6 +206,7 @@ enum mana_ib_command_code { MANA_IB_CREATE_RC_QP = 0x3000a, MANA_IB_DESTROY_RC_QP = 0x3000b, MANA_IB_SET_QP_STATE = 0x3000d, + MANA_IB_QUERY_VF_COUNTERS = 0x30022, }; struct mana_ib_query_adapter_caps_req { @@ -476,6 +478,41 @@ struct mana_rdma_cqe { }; }; /* HW DATA */ +struct mana_rnic_query_vf_cntrs_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; +}; /* HW Data */ + +struct mana_rnic_query_vf_cntrs_resp { + struct gdma_resp_hdr hdr; + u64 requester_timeout; + u64 requester_oos_nak; + u64 requester_rnr_nak; + u64 responder_rnr_nak; + u64 responder_oos; + u64 responder_dup_request; + u64 requester_implicit_nak; + u64 requester_readresp_psn_mismatch; + u64 nak_inv_req; + u64 nak_access_err; + u64 nak_opp_err; + u64 nak_inv_read; + u64 responder_local_len_err; + u64 requestor_local_prot_err; + u64 responder_rem_access_err; + u64 responder_local_qp_err; + u64 responder_malformed_wqe; + u64 general_hw_err; + u64 requester_rnr_nak_retries_exceeded; + u64 requester_retries_exceeded; + u64 total_fatal_err; + u64 received_cnps; + u64 num_qps_congested; + u64 rate_inc_events; + u64 num_qps_recovered; + u64 current_rate; +}; /* HW Data */ + static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev) { return mdev->gdma_dev->gdma_context; |