Here are some more fixes for the eHCA driver, fixing some problems we found
during internal system test.
[1/5] fixes the QP pointer determination for SRQ base QPs
[2/5] fixes a masking error in {,re}reg_phys_mr()
[3/5] fixes a bug in alloc_fmr() and simplifies some code
[4/5] refactors hca_cap_mr_pgsize and fixes a problem with ib_srp
[5/5] enables large page MRs by default
I built the patches on top of Roland's for-2.6.24 git branch. Please review
and queue them for 2.6.24-rc1 if you're okay with them. Thanks!
Cheers,
Joachim
--
Joachim Fenkes ?-- ?eHCA Linux Driver Developer and Hardware Tamer
IBM Deutschland Entwicklung GmbH ?-- ?Dept. 3627 (I/O Firmware Dev. 2)
Schoenaicher Strasse 220 ?-- ?71032 Boeblingen ?-- ?Germany
eMail: [email protected]
Because hardware reports the SRQ token in RWQEs of SRQ base QPs, supply the
base QP token as SRQ token, so we can properly find the SRQ base QP.
Signed-off-by: Joachim Fenkes <[email protected]>
---
drivers/infiniband/hw/ehca/ehca_qp.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index e2bd62b..de18264 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -451,7 +451,6 @@ static struct ehca_qp *internal_create_qp(
has_srq = 1;
parms.ext_type = EQPT_SRQBASE;
parms.srq_qpn = my_srq->real_qp_num;
- parms.srq_token = my_srq->token;
}
if (is_llqp && has_srq) {
@@ -583,6 +582,9 @@ static struct ehca_qp *internal_create_qp(
goto create_qp_exit1;
}
+ if (has_srq)
+ parms.srq_token = my_qp->token;
+
parms.servicetype = ibqptype2servicetype(qp_type);
if (parms.servicetype < 0) {
ret = -EINVAL;
--
1.5.2
Signed-off-by: Joachim Fenkes <[email protected]>
---
drivers/infiniband/hw/ehca/ehca_mrmw.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index da88738..16c9efd 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -259,7 +259,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
pginfo.u.phy.num_phys_buf = num_phys_buf;
pginfo.u.phy.phys_buf_array = phys_buf_array;
pginfo.next_hwpage =
- ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize;
+ ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
@@ -547,7 +547,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
pginfo.u.phy.num_phys_buf = num_phys_buf;
pginfo.u.phy.phys_buf_array = phys_buf_array;
pginfo.next_hwpage =
- ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize;
+ ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
}
if (mr_rereg_mask & IB_MR_REREG_ACCESS)
new_acl = mr_access_flags;
--
1.5.2
Simplify ehca_encode_hwpage_size(), fixing an infinite loop for pgsize == 0
in the process. Fix the bug in alloc_fmr() that triggered the loop.
Signed-off-by: Joachim Fenkes <[email protected]>
---
drivers/infiniband/hw/ehca/ehca_mrmw.c | 15 ++++-----------
1 files changed, 4 insertions(+), 11 deletions(-)
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 16c9efd..b9a788c 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -72,17 +72,9 @@ enum ehca_mr_pgsize {
static u32 ehca_encode_hwpage_size(u32 pgsize)
{
- u32 idx = 0;
- pgsize >>= 12;
- /*
- * map mr page size into hw code:
- * 0, 1, 2, 3 for 4K, 64K, 1M, 64M
- */
- while (!(pgsize & 1)) {
- idx++;
- pgsize >>= 4;
- }
- return idx;
+ int log = ilog2(pgsize);
+ WARN_ON(log < 12 || log > 24 || log & 3);
+ return (log - 12) / 4;
}
static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
@@ -826,6 +818,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
/* register MR on HCA */
memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.hwpage_size = hw_pgsize;
/*
* pginfo.num_hwpages==0, ie register_rpages() will not be called
* but deferred to map_phys_fmr()
--
1.5.2
ehca_shca.hca_cap_mr_pgsize now contains all supported page sizes ORed
together. This makes some checks easier to code and understand, plus we can
return this value verbatim in query_hca(), fixing a problem with SRP
(reported by Anton Blanchard -- thanks!).
Signed-off-by: Joachim Fenkes <[email protected]>
---
drivers/infiniband/hw/ehca/ehca_classes.h | 1 -
drivers/infiniband/hw/ehca/ehca_hca.c | 1 +
drivers/infiniband/hw/ehca/ehca_main.c | 18 ++++++++++++-
drivers/infiniband/hw/ehca/ehca_mrmw.c | 38 ++++++++++++++--------------
4 files changed, 36 insertions(+), 22 deletions(-)
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 0f7a55d..365bc5d 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -323,7 +323,6 @@ extern int ehca_static_rate;
extern int ehca_port_act_time;
extern int ehca_use_hp_mr;
extern int ehca_scaling_code;
-extern int ehca_mr_largepage;
struct ipzu_queue_resp {
u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 4aa3ffa..15806d1 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -77,6 +77,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
}
memset(props, 0, sizeof(struct ib_device_attr));
+ props->page_size_cap = shca->hca_cap_mr_pgsize;
props->fw_ver = rblock->hw_ver;
props->max_mr_size = rblock->max_mr_size;
props->vendor_id = rblock->vendor_id >> 8;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 403467f..d477dc3 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -260,13 +260,20 @@ static struct cap_descr {
{ HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
};
-int ehca_sense_attributes(struct ehca_shca *shca)
+static int ehca_sense_attributes(struct ehca_shca *shca)
{
int i, ret = 0;
u64 h_ret;
struct hipz_query_hca *rblock;
struct hipz_query_port *port;
+ static const u32 pgsize_map[] = {
+ HCA_CAP_MR_PGSIZE_4K, 0x1000,
+ HCA_CAP_MR_PGSIZE_64K, 0x10000,
+ HCA_CAP_MR_PGSIZE_1M, 0x100000,
+ HCA_CAP_MR_PGSIZE_16M, 0x1000000,
+ };
+
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_gen_err("Cannot allocate rblock memory.");
@@ -329,8 +336,15 @@ int ehca_sense_attributes(struct ehca_shca *shca)
if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
- shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported;
+ /* translate supported MR page sizes; always support 4K */
+ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
+ if (ehca_mr_largepage) { /* support extra sizes only if enabled */
+ for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
+ if (rblock->memory_page_size_supported & pgsize_map[i])
+ shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
+ }
+ /* query max MTU from first port -- it's the same for all ports */
port = (struct hipz_query_port *)rblock;
h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
if (h_ret != H_SUCCESS) {
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index b9a788c..bb97915 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -79,9 +79,7 @@ static u32 ehca_encode_hwpage_size(u32 pgsize)
static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
{
- if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)
- return EHCA_MR_PGSIZE16M;
- return EHCA_MR_PGSIZE4K;
+ return 1UL << ilog2(shca->hca_cap_mr_pgsize);
}
static struct ehca_mr *ehca_mr_new(void)
@@ -288,7 +286,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
container_of(pd->device, struct ehca_shca, ib_device);
struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_mr_pginfo pginfo;
- int ret;
+ int ret, page_shift;
u32 num_kpages;
u32 num_hwpages;
u64 hwpage_size;
@@ -343,19 +341,20 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
/* determine number of MR pages */
num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
/* select proper hw_pgsize */
- if (ehca_mr_largepage &&
- (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
- int page_shift = PAGE_SHIFT;
- if (e_mr->umem->hugetlb) {
- /* determine page_shift, clamp between 4K and 16M */
- page_shift = (fls64(length - 1) + 3) & ~3;
- page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
- EHCA_MR_PGSHIFT16M);
- }
- hwpage_size = 1UL << page_shift;
- } else
- hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */
- ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size);
+ page_shift = PAGE_SHIFT;
+ if (e_mr->umem->hugetlb) {
+ /* determine page_shift, clamp between 4K and 16M */
+ page_shift = (fls64(length - 1) + 3) & ~3;
+ page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
+ EHCA_MR_PGSHIFT16M);
+ }
+ hwpage_size = 1UL << page_shift;
+
+ /* now that we have the desired page size, shift until it's
+ * supported, too. 4K is always supported, so this terminates.
+ */
+ while (!(hwpage_size & shca->hca_cap_mr_pgsize))
+ hwpage_size >>= 4;
reg_user_mr_fallback:
num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
@@ -801,8 +800,9 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
ib_fmr = ERR_PTR(-EINVAL);
goto alloc_fmr_exit0;
}
- hw_pgsize = ehca_get_max_hwpage_size(shca);
- if ((1 << fmr_attr->page_shift) != hw_pgsize) {
+
+ hw_pgsize = 1 << fmr_attr->page_shift;
+ if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
fmr_attr->page_shift);
ib_fmr = ERR_PTR(-EINVAL);
--
1.5.2
Signed-off-by: Joachim Fenkes <[email protected]>
---
drivers/infiniband/hw/ehca/ehca_main.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index d477dc3..2f51c13 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -65,7 +65,7 @@ int ehca_port_act_time = 30;
int ehca_poll_all_eqs = 1;
int ehca_static_rate = -1;
int ehca_scaling_code = 0;
-int ehca_mr_largepage = 0;
+int ehca_mr_largepage = 1;
module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO);
module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
--
1.5.2