2007-09-13 16:14:25

by Joachim Fenkes

[permalink] [raw]
Subject: [PATCH 0/3] IB/ehca: MR/MW fixes

This patchset replaces Nam's previous MR/MW patch (posted by me). I split
the #define fixes into a separate patch and moved the "is the memory from
hugetlbfs?" code into ib_umem_get().

[1/3] fixes the page size HW cap defines
[2/3] adds the hugetlb test to ib_umem_get()
[3/3] finally uses the hugetlb flag in ehca_reg_user_mr()

The patches should apply cleanly, in order, on top of my previous 12-patch
set. Please review the changes and apply the patches for 2.6.24 if they are
okay.

Regards,
Joachim

--
Joachim Fenkes ?-- ?eHCA Linux Driver Developer and Hardware Tamer
IBM Deutschland Entwicklung GmbH ?-- ?Dept. 3627 (I/O Firmware Dev. 2)
Schoenaicher Strasse 220 ?-- ?71032 Boeblingen ?-- ?Germany
eMail: [email protected]




2007-09-13 16:17:41

by Joachim Fenkes

[permalink] [raw]
Subject: [PATCH 1/3] IB/ehca: Fix large page HW cap defines

From: Hoang-Nam Nguyen <[email protected]>

Signed-off-by: Joachim Fenkes <[email protected]>
---
drivers/infiniband/hw/ehca/ehca_classes.h | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 206d4eb..c2edd4c 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -99,10 +99,10 @@ struct ehca_sport {
struct ehca_sma_attr saved_attr;
};

-#define HCA_CAP_MR_PGSIZE_4K 1
-#define HCA_CAP_MR_PGSIZE_64K 2
-#define HCA_CAP_MR_PGSIZE_1M 4
-#define HCA_CAP_MR_PGSIZE_16M 8
+#define HCA_CAP_MR_PGSIZE_4K 0x80000000
+#define HCA_CAP_MR_PGSIZE_64K 0x40000000
+#define HCA_CAP_MR_PGSIZE_1M 0x20000000
+#define HCA_CAP_MR_PGSIZE_16M 0x10000000

struct ehca_shca {
struct ib_device ib_device;
--
1.5.2


2007-09-13 16:17:57

by Joachim Fenkes

[permalink] [raw]
Subject: [PATCH 2/3] IB/umem: Add hugetlb flag to struct ib_umem

During ib_umem_get(), determine whether all pages from the memory region are
hugetlb pages and report this in the "hugetlb" field. Low-level driver can
use this information if they need it.

Signed-off-by: Joachim Fenkes <[email protected]>
---
drivers/infiniband/core/umem.c | 20 +++++++++++++++++++-
include/rdma/ib_umem.h | 1 +
2 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 664d2fa..2f54e29 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -37,6 +37,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
+#include <linux/hugetlb.h>

#include "uverbs.h"

@@ -75,6 +76,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
{
struct ib_umem *umem;
struct page **page_list;
+ struct vm_area_struct **vma_list;
struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
@@ -104,6 +106,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);

+ /* We assume the memory is from hugetlb until proved otherwise */
+ umem->hugetlb = 1;
+
INIT_LIST_HEAD(&umem->chunk_list);

page_list = (struct page **) __get_free_page(GFP_KERNEL);
@@ -112,6 +117,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
return ERR_PTR(-ENOMEM);
}

+ /*
+ * if we can't alloc the vma_list, it's not so bad;
+ * just assume the memory is not hugetlb memory
+ */
+ vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
+ if (!vma_list)
+ umem->hugetlb = 0;
+
npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;

down_write(&current->mm->mmap_sem);
@@ -131,7 +144,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
ret = get_user_pages(current, current->mm, cur_base,
min_t(int, npages,
PAGE_SIZE / sizeof (struct page *)),
- 1, !umem->writable, page_list, NULL);
+ 1, !umem->writable, page_list, vma_list);

if (ret < 0)
goto out;
@@ -152,6 +165,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,

chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
for (i = 0; i < chunk->nents; ++i) {
+ if (vma_list &&
+ !is_vm_hugetlb_page(vma_list[i + off]))
+ umem->hugetlb = 0;
chunk->page_list[i].page = page_list[i + off];
chunk->page_list[i].offset = 0;
chunk->page_list[i].length = PAGE_SIZE;
@@ -186,6 +202,8 @@ out:
current->mm->locked_vm = locked;

up_write(&current->mm->mmap_sem);
+ if (vma_list)
+ free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);

return ret < 0 ? ERR_PTR(ret) : umem;
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index c533d6c..2229842 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -45,6 +45,7 @@ struct ib_umem {
int offset;
int page_size;
int writable;
+ int hugetlb;
struct list_head chunk_list;
struct work_struct work;
struct mm_struct *mm;
--
1.5.2


2007-09-13 16:18:25

by Joachim Fenkes

[permalink] [raw]
Subject: [PATCH 3/3] IB/ehca: Make sure user pages are from hugetlb before using MR large pages

...because, on virtualized hardware like System p, we can't be sure that the
physical pages behind them are contiguous.

Signed-off-by: Joachim Fenkes <[email protected]>
---
drivers/infiniband/hw/ehca/ehca_mrmw.c | 25 +++++++++++++++----------
1 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 4c8f3b3..4ba8b7c 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -51,6 +51,7 @@

#define NUM_CHUNKS(length, chunk_size) \
(((length) + (chunk_size - 1)) / (chunk_size))
+
/* max number of rpages (per hcall register_rpages) */
#define MAX_RPAGES 512

@@ -64,6 +65,11 @@ enum ehca_mr_pgsize {
EHCA_MR_PGSIZE16M = 0x1000000L
};

+#define EHCA_MR_PGSHIFT4K 12
+#define EHCA_MR_PGSHIFT64K 16
+#define EHCA_MR_PGSHIFT1M 20
+#define EHCA_MR_PGSHIFT16M 24
+
static u32 ehca_encode_hwpage_size(u32 pgsize)
{
u32 idx = 0;
@@ -347,17 +353,16 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
/* select proper hw_pgsize */
if (ehca_mr_largepage &&
(shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
- if (length <= EHCA_MR_PGSIZE4K
- && PAGE_SIZE == EHCA_MR_PGSIZE4K)
- hwpage_size = EHCA_MR_PGSIZE4K;
- else if (length <= EHCA_MR_PGSIZE64K)
- hwpage_size = EHCA_MR_PGSIZE64K;
- else if (length <= EHCA_MR_PGSIZE1M)
- hwpage_size = EHCA_MR_PGSIZE1M;
- else
- hwpage_size = EHCA_MR_PGSIZE16M;
+ int page_shift = PAGE_SHIFT;
+ if (e_mr->umem->hugetlb) {
+ /* determine page_shift, clamp between 4K and 16M */
+ page_shift = (fls64(length - 1) + 3) & ~3;
+ page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
+ EHCA_MR_PGSHIFT16M);
+ }
+ hwpage_size = 1UL << page_shift;
} else
- hwpage_size = EHCA_MR_PGSIZE4K;
+ hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */
ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size);

reg_user_mr_fallback:
--
1.5.2


2007-09-17 22:12:39

by Roland Dreier

[permalink] [raw]

2007-09-19 17:16:41

by Roland Dreier

[permalink] [raw]
Subject: Re: [PATCH 2/3] IB/umem: Add hugetlb flag to struct ib_umem

This looks realy nice to me... a very clean patch.

I'll add this to 2.6.24 unless someone objects soon...

2007-09-20 16:29:29

by Roland Dreier

[permalink] [raw]