Add support for DSCP configuration. For DSCP, get dscp-prio mapping
via hns3 nic driver api .get_dscp_prio() and fill the SL (in WQE for
UD or in QPC for RC) with the priority value. The prio-tc mapping is
configured to HW by hns3 nic driver. HW will select a corresponding
TC according to SL and the prio-tc mapping.
Signed-off-by: Junxian Huang <[email protected]>
---
drivers/infiniband/hw/hns/hns_roce_ah.c | 32 +++++---
drivers/infiniband/hw/hns/hns_roce_device.h | 6 ++
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 86 ++++++++++++++++-----
drivers/infiniband/hw/hns/hns_roce_qp.c | 13 ++++
include/uapi/rdma/hns-abi.h | 9 ++-
5 files changed, 117 insertions(+), 29 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index b4209b6aed8d..91f7fe0f3235 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -59,8 +59,11 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device);
struct hns_roce_ib_create_ah_resp resp = {};
struct hns_roce_ah *ah = to_hr_ah(ibah);
- int ret = 0;
+ u8 tclass = get_tclass(grh);
+ u8 priority = 0;
+ u8 tc_mode = 0;
u32 max_sl;
+ int ret;
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata)
return -EOPNOTSUPP;
@@ -74,16 +77,23 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
ah->av.hop_limit = grh->hop_limit;
ah->av.flowlabel = grh->flow_label;
ah->av.udp_sport = get_ah_udp_sport(ah_attr);
- ah->av.tclass = get_tclass(grh);
-
- ah->av.sl = rdma_ah_get_sl(ah_attr);
- max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
- if (unlikely(ah->av.sl > max_sl)) {
- ibdev_err_ratelimited(&hr_dev->ib_dev,
- "failed to set sl, sl (%u) shouldn't be larger than %u.\n",
- ah->av.sl, max_sl);
+ ah->av.tclass = tclass;
+
+ ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+
+ if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return ret;
+
+ if (tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ ah->av.sl = priority;
+ else
+ ah->av.sl = rdma_ah_get_sl(ah_attr);
+
+ if (!check_sl_valid(hr_dev, ah->av.sl))
return -EINVAL;
- }
memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
@@ -99,6 +109,8 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
}
if (udata) {
+ resp.priority = ah->av.sl;
+ resp.tc_mode = tc_mode;
memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
ret = ib_copy_to_udata(udata, &resp,
min(udata->outlen, sizeof(resp)));
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index c3cbd0a494bf..78b4d19ff848 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -645,6 +645,8 @@ struct hns_roce_qp {
struct hns_user_mmap_entry *dwqe_mmap_entry;
u32 config;
enum hns_roce_cong_type cong_type;
+ u8 tc_mode;
+ u8 priority;
};
struct hns_roce_ib_iboe {
@@ -950,6 +952,8 @@ struct hns_roce_hw {
int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
int (*query_hw_counter)(struct hns_roce_dev *hr_dev,
u64 *stats, u32 port, int *hw_counters);
+ int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority);
const struct ib_device_ops *hns_roce_dev_ops;
const struct ib_device_ops *hns_roce_dev_srq_ops;
};
@@ -1292,4 +1296,6 @@ struct hns_user_mmap_entry *
hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
size_t length,
enum hns_roce_mmap_type mmap_type);
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl);
+
#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index ba7ae792d279..4de463e787d4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -443,10 +443,6 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
-
- if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL))
- return -EINVAL;
-
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
ud_sq_wqe->sgid_index = ah->av.gid_index;
@@ -4828,6 +4824,70 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
return 0;
}
+static int hns_roce_hw_v2_get_dscp(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (!ops->get_dscp_prio)
+ return -EOPNOTSUPP;
+
+ return ops->get_dscp_prio(handle, dscp, tc_mode, priority);
+}
+
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl)
+{
+ u32 max_sl;
+
+ max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
+ if (unlikely(sl > max_sl)) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to set SL(%u). Shouldn't be larger than %u.\n",
+ sl, max_sl);
+ return false;
+ }
+
+ return true;
+}
+
+static int hns_roce_set_sl(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 sl_num;
+ int ret;
+
+ ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh),
+ &hr_qp->tc_mode, &hr_qp->priority);
+ if (ret && ret != -EOPNOTSUPP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to get dscp, ret = %d.\n", ret);
+ return ret;
+ }
+
+ if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ hr_qp->sl = hr_qp->priority;
+ else
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+
+ if (!check_sl_valid(hr_dev, hr_qp->sl))
+ return -EINVAL;
+
+ hr_reg_write(context, QPC_SL, hr_qp->sl);
+ hr_reg_clear(qpc_mask, QPC_SL);
+
+ return 0;
+}
+
static int hns_roce_v2_set_path(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
@@ -4843,25 +4903,18 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
int is_roce_protocol;
u16 vlan_id = 0xffff;
bool is_udp = false;
- u32 max_sl;
u8 ib_port;
u8 hr_port;
int ret;
- max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
- if (unlikely(sl > max_sl)) {
- ibdev_err_ratelimited(ibdev,
- "failed to fill QPC, sl (%u) shouldn't be larger than %u.\n",
- sl, max_sl);
- return -EINVAL;
- }
-
/*
* If free_mr_en of qp is set, it means that this qp comes from
* free mr. This qp will perform the loopback operation.
* In the loopback scenario, only sl needs to be set.
*/
if (hr_qp->free_mr_en) {
+ if (!check_sl_valid(hr_dev, sl))
+ return -EINVAL;
hr_reg_write(context, QPC_SL, sl);
hr_reg_clear(qpc_mask, QPC_SL);
hr_qp->sl = sl;
@@ -4931,11 +4984,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
- hr_qp->sl = sl;
- hr_reg_write(context, QPC_SL, hr_qp->sl);
- hr_reg_clear(qpc_mask, QPC_SL);
-
- return 0;
+ return hns_roce_set_sl(ibqp, attr, context, qpc_mask);
}
static bool check_qp_state(enum ib_qp_state cur_state,
@@ -6735,6 +6784,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.query_srqc = hns_roce_v2_query_srqc,
.query_sccc = hns_roce_v2_query_sccc,
.query_hw_counter = hns_roce_hw_v2_query_counter,
+ .get_dscp = hns_roce_hw_v2_get_dscp,
.hns_roce_dev_ops = &hns_roce_v2_dev_ops,
.hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index f35a66325d9a..697230f964b1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -1386,6 +1386,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_ib_modify_qp_resp resp = {};
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
enum ib_qp_state cur_state, new_state;
int ret = -EINVAL;
@@ -1427,6 +1428,18 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
new_state, udata);
+ if (ret)
+ goto out;
+
+ if (udata && udata->outlen) {
+ resp.tc_mode = hr_qp->tc_mode;
+ resp.priority = hr_qp->sl;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret)
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to copy modify qp resp.\n");
+ }
out:
mutex_unlock(&hr_qp->mutex);
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 158670da2b2a..94e861870e27 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -109,6 +109,12 @@ struct hns_roce_ib_create_qp_resp {
__aligned_u64 dwqe_mmap_key;
};
+struct hns_roce_ib_modify_qp_resp {
+ __u8 tc_mode;
+ __u8 priority;
+ __u8 reserved[6];
+};
+
enum {
HNS_ROCE_EXSGE_FLAGS = 1 << 0,
HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
@@ -143,7 +149,8 @@ struct hns_roce_ib_alloc_pd_resp {
struct hns_roce_ib_create_ah_resp {
__u8 dmac[6];
- __u8 reserved[2];
+ __u8 priority;
+ __u8 tc_mode;
};
#endif /* HNS_ABI_USER_H */
--
2.30.0
On Fri, Mar 15, 2024 at 05:35:51PM +0800, Junxian Huang wrote:
> Add support for DSCP configuration. For DSCP, get dscp-prio mapping
> via hns3 nic driver api .get_dscp_prio() and fill the SL (in WQE for
> UD or in QPC for RC) with the priority value. The prio-tc mapping is
> configured to HW by hns3 nic driver. HW will select a corresponding
> TC according to SL and the prio-tc mapping.
>
> Signed-off-by: Junxian Huang <[email protected]>
> ---
> drivers/infiniband/hw/hns/hns_roce_ah.c | 32 +++++---
> drivers/infiniband/hw/hns/hns_roce_device.h | 6 ++
> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 86 ++++++++++++++++-----
> drivers/infiniband/hw/hns/hns_roce_qp.c | 13 ++++
> include/uapi/rdma/hns-abi.h | 9 ++-
> 5 files changed, 117 insertions(+), 29 deletions(-)
1. What is TC mode?
2. Did you post rdma-core PR?
Thanks
On 2024/4/1 19:48, Leon Romanovsky wrote:
> On Fri, Mar 15, 2024 at 05:35:51PM +0800, Junxian Huang wrote:
>> Add support for DSCP configuration. For DSCP, get dscp-prio mapping
>> via hns3 nic driver api .get_dscp_prio() and fill the SL (in WQE for
>> UD or in QPC for RC) with the priority value. The prio-tc mapping is
>> configured to HW by hns3 nic driver. HW will select a corresponding
>> TC according to SL and the prio-tc mapping.
>>
>> Signed-off-by: Junxian Huang <[email protected]>
>> ---
>> drivers/infiniband/hw/hns/hns_roce_ah.c | 32 +++++---
>> drivers/infiniband/hw/hns/hns_roce_device.h | 6 ++
>> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 86 ++++++++++++++++-----
>> drivers/infiniband/hw/hns/hns_roce_qp.c | 13 ++++
>> include/uapi/rdma/hns-abi.h | 9 ++-
>> 5 files changed, 117 insertions(+), 29 deletions(-)
>
> 1. What is TC mode?
TC mode indicates whether the HW is configured as DSCP mode or VLAN priority
mode currently.
> 2. Did you post rdma-core PR?
Not yet. I was meant to wait until this patch is applied. Should I post it
right now?
Junxian
>
> Thanks
On Mon, Apr 01, 2024 at 09:25:39PM +0800, Junxian Huang wrote:
>
>
> On 2024/4/1 19:48, Leon Romanovsky wrote:
> > On Fri, Mar 15, 2024 at 05:35:51PM +0800, Junxian Huang wrote:
> >> Add support for DSCP configuration. For DSCP, get dscp-prio mapping
> >> via hns3 nic driver api .get_dscp_prio() and fill the SL (in WQE for
> >> UD or in QPC for RC) with the priority value. The prio-tc mapping is
> >> configured to HW by hns3 nic driver. HW will select a corresponding
> >> TC according to SL and the prio-tc mapping.
> >>
> >> Signed-off-by: Junxian Huang <[email protected]>
> >> ---
> >> drivers/infiniband/hw/hns/hns_roce_ah.c | 32 +++++---
> >> drivers/infiniband/hw/hns/hns_roce_device.h | 6 ++
> >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 86 ++++++++++++++++-----
> >> drivers/infiniband/hw/hns/hns_roce_qp.c | 13 ++++
> >> include/uapi/rdma/hns-abi.h | 9 ++-
> >> 5 files changed, 117 insertions(+), 29 deletions(-)
> >
> > 1. What is TC mode?
>
> TC mode indicates whether the HW is configured as DSCP mode or VLAN priority
> mode currently.
>
> > 2. Did you post rdma-core PR?
>
> Not yet. I was meant to wait until this patch is applied. Should I post it
> right now?
Yes, for any UAPI changes, we require to have rdma-core PR.
Thanks
>
> Junxian
>
> >
> > Thanks
On 2024/4/2 2:12, Leon Romanovsky wrote:
> On Mon, Apr 01, 2024 at 09:25:39PM +0800, Junxian Huang wrote:
>>
>>
>> On 2024/4/1 19:48, Leon Romanovsky wrote:
>>> On Fri, Mar 15, 2024 at 05:35:51PM +0800, Junxian Huang wrote:
>>>> Add support for DSCP configuration. For DSCP, get dscp-prio mapping
>>>> via hns3 nic driver api .get_dscp_prio() and fill the SL (in WQE for
>>>> UD or in QPC for RC) with the priority value. The prio-tc mapping is
>>>> configured to HW by hns3 nic driver. HW will select a corresponding
>>>> TC according to SL and the prio-tc mapping.
>>>>
>>>> Signed-off-by: Junxian Huang <[email protected]>
>>>> ---
>>>> drivers/infiniband/hw/hns/hns_roce_ah.c | 32 +++++---
>>>> drivers/infiniband/hw/hns/hns_roce_device.h | 6 ++
>>>> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 86 ++++++++++++++++-----
>>>> drivers/infiniband/hw/hns/hns_roce_qp.c | 13 ++++
>>>> include/uapi/rdma/hns-abi.h | 9 ++-
>>>> 5 files changed, 117 insertions(+), 29 deletions(-)
>>>
>>> 1. What is TC mode?
>>
>> TC mode indicates whether the HW is configured as DSCP mode or VLAN priority
>> mode currently.
>>
>>> 2. Did you post rdma-core PR?
>>
>> Not yet. I was meant to wait until this patch is applied. Should I post it
>> right now?
>
> Yes, for any UAPI changes, we require to have rdma-core PR.
>
> Thanks
>
PR has been posted.
Thanks,
Junxian
>>
>> Junxian
>>
>>>
>>> Thanks
On Fri, 15 Mar 2024 17:35:51 +0800, Junxian Huang wrote:
> Add support for DSCP configuration. For DSCP, get dscp-prio mapping
> via hns3 nic driver api .get_dscp_prio() and fill the SL (in WQE for
> UD or in QPC for RC) with the priority value. The prio-tc mapping is
> configured to HW by hns3 nic driver. HW will select a corresponding
> TC according to SL and the prio-tc mapping.
>
>
> [...]
Applied, thanks!
[1/1] RDMA/hns: Support DSCP
https://git.kernel.org/rdma/rdma/c/c3236d538646c8
Best regards,
--
Leon Romanovsky <[email protected]>