Here are 3 patches involving function resource configuration.
1. #1: The first patch supports getting xrcd num from firmware.
2. #2: The second patch removes a redundant configuration in driver,
which is now handled by firmware.
3. #3: The third patch adds validity check for function resource and
adjusts the invalid ones.
V2 removes 'inline' before function names in the third patch.
Junxian Huang (2):
RDMA/hns: Remove VF extend configuration
RDMA/hns: Add check and adjust for function resource values
Luoyouming (1):
RDMA/hns: support get xrcd num from firmware
drivers/infiniband/hw/hns/hns_roce_device.h | 1 -
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 194 ++++++++++++--------
drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 50 +++--
3 files changed, 157 insertions(+), 88 deletions(-)
--
2.30.0
Currently, RoCE driver gets function resource values from firmware
without validity check. As these resources are mostly related to memory,
an invalid value may lead to serious consequence such as kernel panic.
This patch adds check for these resource values and adjusts the invalid
ones.
Signed-off-by: Junxian Huang <[email protected]>
---
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 115 ++++++++++++++++++++-
drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 37 +++++++
2 files changed, 148 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c4b92d8bd98a..f5649fd25042 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1650,6 +1650,97 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
+static const struct hns_roce_bt_num {
+ u32 res_offset;
+ u32 min;
+ u32 max;
+ enum hns_roce_res_invalid_flag invalid_flag;
+ enum hns_roce_res_revision revision;
+ bool vf_support;
+} bt_num_table[] = {
+ {RES_OFFSET_IN_CAPS(qpc_bt_num), 1,
+ MAX_QPC_BT_NUM, QPC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
+ {RES_OFFSET_IN_CAPS(srqc_bt_num), 1,
+ MAX_SRQC_BT_NUM, SRQC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
+ {RES_OFFSET_IN_CAPS(cqc_bt_num), 1,
+ MAX_CQC_BT_NUM, CQC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
+ {RES_OFFSET_IN_CAPS(mpt_bt_num), 1,
+ MAX_MPT_BT_NUM, MPT_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
+ {RES_OFFSET_IN_CAPS(sl_num), 1,
+ MAX_SL_NUM, QID_NUM_INVALID_FLAG, RES_FOR_ALL, true},
+ {RES_OFFSET_IN_CAPS(sccc_bt_num), 1,
+ MAX_SCCC_BT_NUM, SCCC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
+ {RES_OFFSET_IN_CAPS(qpc_timer_bt_num), 1,
+ MAX_QPC_TIMER_BT_NUM, QPC_TIMER_BT_NUM_INVALID_FLAG,
+ RES_FOR_ALL, false},
+ {RES_OFFSET_IN_CAPS(cqc_timer_bt_num), 1,
+ MAX_CQC_TIMER_BT_NUM, CQC_TIMER_BT_NUM_INVALID_FLAG,
+ RES_FOR_ALL, false},
+ {RES_OFFSET_IN_CAPS(gmv_bt_num), 1,
+ MAX_GMV_BT_NUM, GMV_BT_NUM_INVALID_FLAG,
+ RES_FOR_HIP09, true},
+ {RES_OFFSET_IN_CAPS(smac_bt_num), 1,
+ MAX_SMAC_BT_NUM, SMAC_BT_NUM_INVALID_FLAG,
+ RES_FOR_HIP08, true},
+ {RES_OFFSET_IN_CAPS(sgid_bt_num), 1,
+ MAX_SGID_BT_NUM, SGID_BT_NUM_INVALID_FLAG,
+ RES_FOR_HIP08, true},
+};
+
+static bool check_res_is_supported(struct hns_roce_dev *hr_dev,
+ struct hns_roce_bt_num *bt_num_entry)
+{
+ if (!bt_num_entry->vf_support && hr_dev->is_vf)
+ return false;
+
+ if (bt_num_entry->revision == RES_FOR_HIP09 &&
+ hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08)
+ return false;
+
+ if (bt_num_entry->revision == RES_FOR_HIP08 &&
+ hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return false;
+
+ return true;
+}
+
+static void adjust_eqc_bt_num(struct hns_roce_caps *caps, u16 *invalid_flag)
+{
+ if (caps->eqc_bt_num < caps->num_comp_vectors + caps->num_aeq_vectors ||
+ caps->eqc_bt_num > MAX_EQC_BT_NUM) {
+ caps->eqc_bt_num = caps->eqc_bt_num > MAX_EQC_BT_NUM ?
+ MAX_EQC_BT_NUM : caps->num_comp_vectors +
+ caps->num_aeq_vectors;
+ *invalid_flag |= 1 << EQC_BT_NUM_INVALID_FLAG;
+ }
+}
+
+static u16 adjust_res_caps(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ u16 invalid_flag = 0;
+ u32 min, max;
+ u32 *res;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bt_num_table); i++) {
+ if (!check_res_is_supported(hr_dev, &bt_num_table[i]))
+ continue;
+
+ res = (u32 *)((void *)caps + bt_num_table[i].res_offset);
+ min = bt_num_table[i].min;
+ max = bt_num_table[i].max;
+ if (*res < min || *res > max) {
+ *res = *res < min ? min : max;
+ invalid_flag |= 1 << bt_num_table[i].invalid_flag;
+ }
+ }
+
+ adjust_eqc_bt_num(caps, &invalid_flag);
+
+ return invalid_flag;
+}
+
static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf)
{
struct hns_roce_cmq_desc desc[2];
@@ -1730,11 +1821,19 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
}
ret = load_pf_timer_res_caps(hr_dev);
- if (ret)
+ if (ret) {
dev_err(dev, "failed to load pf timer resource, ret = %d.\n",
ret);
+ return ret;
+ }
- return ret;
+ ret = adjust_res_caps(hr_dev);
+ if (ret)
+ dev_warn(dev,
+ "invalid resource values have been adjusted, invalid_flag = 0x%x.\n",
+ ret);
+
+ return 0;
}
static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
@@ -1743,10 +1842,18 @@ static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
int ret;
ret = load_func_res_caps(hr_dev, true);
- if (ret)
+ if (ret) {
dev_err(dev, "failed to load vf res caps, ret = %d.\n", ret);
+ return ret;
+ }
- return ret;
+ ret = adjust_res_caps(hr_dev);
+ if (ret)
+ dev_warn(dev,
+ "invalid resource values have been adjusted, invalid_flag = 0x%x.\n",
+ ret);
+
+ return 0;
}
static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index d9693f6cc802..c2d46383c88c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -972,6 +972,43 @@ struct hns_roce_func_clear {
#define CFG_GLOBAL_PARAM_1US_CYCLES CMQ_REQ_FIELD_LOC(9, 0)
#define CFG_GLOBAL_PARAM_UDP_PORT CMQ_REQ_FIELD_LOC(31, 16)
+enum hns_roce_res_invalid_flag {
+ QPC_BT_NUM_INVALID_FLAG,
+ SRQC_BT_NUM_INVALID_FLAG,
+ CQC_BT_NUM_INVALID_FLAG,
+ MPT_BT_NUM_INVALID_FLAG,
+ EQC_BT_NUM_INVALID_FLAG,
+ SMAC_BT_NUM_INVALID_FLAG,
+ SGID_BT_NUM_INVALID_FLAG,
+ QID_NUM_INVALID_FLAG,
+ SCCC_BT_NUM_INVALID_FLAG,
+ GMV_BT_NUM_INVALID_FLAG,
+ QPC_TIMER_BT_NUM_INVALID_FLAG,
+ CQC_TIMER_BT_NUM_INVALID_FLAG,
+};
+
+enum hns_roce_res_revision {
+ RES_FOR_HIP08,
+ RES_FOR_HIP09,
+ RES_FOR_ALL,
+};
+
+#define RES_OFFSET_IN_CAPS(res) \
+ (offsetof(struct hns_roce_caps, res))
+
+#define MAX_QPC_BT_NUM 2048
+#define MAX_SRQC_BT_NUM 512
+#define MAX_CQC_BT_NUM 512
+#define MAX_MPT_BT_NUM 512
+#define MAX_EQC_BT_NUM 512
+#define MAX_SMAC_BT_NUM 256
+#define MAX_SGID_BT_NUM 256
+#define MAX_SL_NUM 8
+#define MAX_SCCC_BT_NUM 512
+#define MAX_GMV_BT_NUM 256
+#define MAX_QPC_TIMER_BT_NUM 1728
+#define MAX_CQC_TIMER_BT_NUM 1600
+
/*
* Fields of HNS_ROCE_OPC_QUERY_PF_RES, HNS_ROCE_OPC_QUERY_VF_RES
* and HNS_ROCE_OPC_ALLOC_VF_RES
--
2.30.0
On Mon, Jul 17, 2023 at 02:03:40PM +0800, Junxian Huang wrote:
> Currently, RoCE driver gets function resource values from firmware
> without validity check.
Kernel trusts devices underneath, otherwise why should we stop with
capabilities? Let's check all PCI transactions and verify any response
from FW too.
> As these resources are mostly related to memory,
> an invalid value may lead to serious consequence such as kernel panic.
>
> This patch adds check for these resource values and adjusts the invalid
> ones.
These are FW bugs which should be fixed.
Thanks
>
> Signed-off-by: Junxian Huang <[email protected]>
> ---
> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 115 ++++++++++++++++++++-
> drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 37 +++++++
> 2 files changed, 148 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index c4b92d8bd98a..f5649fd25042 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -1650,6 +1650,97 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
> return hns_roce_cmq_send(hr_dev, &desc, 1);
> }
>
> +static const struct hns_roce_bt_num {
> + u32 res_offset;
> + u32 min;
> + u32 max;
> + enum hns_roce_res_invalid_flag invalid_flag;
> + enum hns_roce_res_revision revision;
> + bool vf_support;
> +} bt_num_table[] = {
> + {RES_OFFSET_IN_CAPS(qpc_bt_num), 1,
> + MAX_QPC_BT_NUM, QPC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(srqc_bt_num), 1,
> + MAX_SRQC_BT_NUM, SRQC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(cqc_bt_num), 1,
> + MAX_CQC_BT_NUM, CQC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(mpt_bt_num), 1,
> + MAX_MPT_BT_NUM, MPT_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(sl_num), 1,
> + MAX_SL_NUM, QID_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(sccc_bt_num), 1,
> + MAX_SCCC_BT_NUM, SCCC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(qpc_timer_bt_num), 1,
> + MAX_QPC_TIMER_BT_NUM, QPC_TIMER_BT_NUM_INVALID_FLAG,
> + RES_FOR_ALL, false},
> + {RES_OFFSET_IN_CAPS(cqc_timer_bt_num), 1,
> + MAX_CQC_TIMER_BT_NUM, CQC_TIMER_BT_NUM_INVALID_FLAG,
> + RES_FOR_ALL, false},
> + {RES_OFFSET_IN_CAPS(gmv_bt_num), 1,
> + MAX_GMV_BT_NUM, GMV_BT_NUM_INVALID_FLAG,
> + RES_FOR_HIP09, true},
> + {RES_OFFSET_IN_CAPS(smac_bt_num), 1,
> + MAX_SMAC_BT_NUM, SMAC_BT_NUM_INVALID_FLAG,
> + RES_FOR_HIP08, true},
> + {RES_OFFSET_IN_CAPS(sgid_bt_num), 1,
> + MAX_SGID_BT_NUM, SGID_BT_NUM_INVALID_FLAG,
> + RES_FOR_HIP08, true},
> +};
> +
> +static bool check_res_is_supported(struct hns_roce_dev *hr_dev,
> + struct hns_roce_bt_num *bt_num_entry)
> +{
> + if (!bt_num_entry->vf_support && hr_dev->is_vf)
> + return false;
> +
> + if (bt_num_entry->revision == RES_FOR_HIP09 &&
> + hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08)
> + return false;
> +
> + if (bt_num_entry->revision == RES_FOR_HIP08 &&
> + hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
> + return false;
> +
> + return true;
> +}
> +
> +static void adjust_eqc_bt_num(struct hns_roce_caps *caps, u16 *invalid_flag)
> +{
> + if (caps->eqc_bt_num < caps->num_comp_vectors + caps->num_aeq_vectors ||
> + caps->eqc_bt_num > MAX_EQC_BT_NUM) {
> + caps->eqc_bt_num = caps->eqc_bt_num > MAX_EQC_BT_NUM ?
> + MAX_EQC_BT_NUM : caps->num_comp_vectors +
> + caps->num_aeq_vectors;
> + *invalid_flag |= 1 << EQC_BT_NUM_INVALID_FLAG;
> + }
> +}
> +
> +static u16 adjust_res_caps(struct hns_roce_dev *hr_dev)
> +{
> + struct hns_roce_caps *caps = &hr_dev->caps;
> + u16 invalid_flag = 0;
> + u32 min, max;
> + u32 *res;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(bt_num_table); i++) {
> + if (!check_res_is_supported(hr_dev, &bt_num_table[i]))
> + continue;
> +
> + res = (u32 *)((void *)caps + bt_num_table[i].res_offset);
> + min = bt_num_table[i].min;
> + max = bt_num_table[i].max;
> + if (*res < min || *res > max) {
> + *res = *res < min ? min : max;
> + invalid_flag |= 1 << bt_num_table[i].invalid_flag;
> + }
> + }
> +
> + adjust_eqc_bt_num(caps, &invalid_flag);
> +
> + return invalid_flag;
> +}
> +
> static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf)
> {
> struct hns_roce_cmq_desc desc[2];
> @@ -1730,11 +1821,19 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
> }
>
> ret = load_pf_timer_res_caps(hr_dev);
> - if (ret)
> + if (ret) {
> dev_err(dev, "failed to load pf timer resource, ret = %d.\n",
> ret);
> + return ret;
> + }
>
> - return ret;
> + ret = adjust_res_caps(hr_dev);
> + if (ret)
> + dev_warn(dev,
> + "invalid resource values have been adjusted, invalid_flag = 0x%x.\n",
> + ret);
> +
> + return 0;
> }
>
> static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
> @@ -1743,10 +1842,18 @@ static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
> int ret;
>
> ret = load_func_res_caps(hr_dev, true);
> - if (ret)
> + if (ret) {
> dev_err(dev, "failed to load vf res caps, ret = %d.\n", ret);
> + return ret;
> + }
>
> - return ret;
> + ret = adjust_res_caps(hr_dev);
> + if (ret)
> + dev_warn(dev,
> + "invalid resource values have been adjusted, invalid_flag = 0x%x.\n",
> + ret);
> +
> + return 0;
> }
>
> static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> index d9693f6cc802..c2d46383c88c 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> @@ -972,6 +972,43 @@ struct hns_roce_func_clear {
> #define CFG_GLOBAL_PARAM_1US_CYCLES CMQ_REQ_FIELD_LOC(9, 0)
> #define CFG_GLOBAL_PARAM_UDP_PORT CMQ_REQ_FIELD_LOC(31, 16)
>
> +enum hns_roce_res_invalid_flag {
> + QPC_BT_NUM_INVALID_FLAG,
> + SRQC_BT_NUM_INVALID_FLAG,
> + CQC_BT_NUM_INVALID_FLAG,
> + MPT_BT_NUM_INVALID_FLAG,
> + EQC_BT_NUM_INVALID_FLAG,
> + SMAC_BT_NUM_INVALID_FLAG,
> + SGID_BT_NUM_INVALID_FLAG,
> + QID_NUM_INVALID_FLAG,
> + SCCC_BT_NUM_INVALID_FLAG,
> + GMV_BT_NUM_INVALID_FLAG,
> + QPC_TIMER_BT_NUM_INVALID_FLAG,
> + CQC_TIMER_BT_NUM_INVALID_FLAG,
> +};
> +
> +enum hns_roce_res_revision {
> + RES_FOR_HIP08,
> + RES_FOR_HIP09,
> + RES_FOR_ALL,
> +};
> +
> +#define RES_OFFSET_IN_CAPS(res) \
> + (offsetof(struct hns_roce_caps, res))
> +
> +#define MAX_QPC_BT_NUM 2048
> +#define MAX_SRQC_BT_NUM 512
> +#define MAX_CQC_BT_NUM 512
> +#define MAX_MPT_BT_NUM 512
> +#define MAX_EQC_BT_NUM 512
> +#define MAX_SMAC_BT_NUM 256
> +#define MAX_SGID_BT_NUM 256
> +#define MAX_SL_NUM 8
> +#define MAX_SCCC_BT_NUM 512
> +#define MAX_GMV_BT_NUM 256
> +#define MAX_QPC_TIMER_BT_NUM 1728
> +#define MAX_CQC_TIMER_BT_NUM 1600
> +
> /*
> * Fields of HNS_ROCE_OPC_QUERY_PF_RES, HNS_ROCE_OPC_QUERY_VF_RES
> * and HNS_ROCE_OPC_ALLOC_VF_RES
> --
> 2.30.0
>
Hi Junxian,
kernel test robot noticed the following build warnings:
[auto build test WARNING on rdma/for-next]
[also build test WARNING on linus/master v6.5-rc2 next-20230718]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Junxian-Huang/RDMA-hns-support-get-xrcd-num-from-firmware/20230718-171525
base: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
patch link: https://lore.kernel.org/r/20230717060340.453850-4-huangjunxian6%40hisilicon.com
patch subject: [PATCH v2 for-rc 3/3] RDMA/hns: Add check and adjust for function resource values
config: arm64-allyesconfig (https://download.01.org/0day-ci/archive/20230719/[email protected]/config)
compiler: aarch64-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230719/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All warnings (new ones prefixed by >>):
drivers/infiniband/hw/hns/hns_roce_hw_v2.c: In function 'adjust_res_caps':
>> drivers/infiniband/hw/hns/hns_roce_hw_v2.c:1709:53: warning: passing argument 2 of 'check_res_is_supported' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
1709 | if (!check_res_is_supported(hr_dev, &bt_num_table[i]))
| ^~~~~~~~~~~~~~~~
drivers/infiniband/hw/hns/hns_roce_hw_v2.c:1673:60: note: expected 'struct hns_roce_bt_num *' but argument is of type 'const struct hns_roce_bt_num *'
1673 | struct hns_roce_bt_num *bt_num_entry)
| ~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~
vim +1709 drivers/infiniband/hw/hns/hns_roce_hw_v2.c
1699
1700 static u16 adjust_res_caps(struct hns_roce_dev *hr_dev)
1701 {
1702 struct hns_roce_caps *caps = &hr_dev->caps;
1703 u16 invalid_flag = 0;
1704 u32 min, max;
1705 u32 *res;
1706 int i;
1707
1708 for (i = 0; i < ARRAY_SIZE(bt_num_table); i++) {
> 1709 if (!check_res_is_supported(hr_dev, &bt_num_table[i]))
1710 continue;
1711
1712 res = (u32 *)((void *)caps + bt_num_table[i].res_offset);
1713 min = bt_num_table[i].min;
1714 max = bt_num_table[i].max;
1715 if (*res < min || *res > max) {
1716 *res = *res < min ? min : max;
1717 invalid_flag |= 1 << bt_num_table[i].invalid_flag;
1718 }
1719 }
1720
1721 adjust_eqc_bt_num(caps, &invalid_flag);
1722
1723 return invalid_flag;
1724 }
1725
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki