2018-05-26 08:12:55

by Wei Hu (Xavier)

[permalink] [raw]
Subject: [PATCH V3 rdma-next 0/4] {RDMA/uverbs,hns}: Misc update for hns driver

Hi, Jason and Doug

This patchset included fixing bug, reset process, and
implementation of the disassociate_ucontext API for hns
driver.

We refactored the implemetion of disaacociate_ucontext
in V1 series according to Jason's comment. The related
link: https://lkml.org/lkml/2018/5/22/967
Thanks.

Regards
Wei Hu

Wei Hu (Xavier) (4):
RDMA/hns: Add reset process for RoCE in hip08
RDMA/hns: Fix the illegal memory operation when cross page
RDMA/uverbs: Hoist the common process of disassociate_ucontext into ib
core
RDMA/hns: Implement the disassociate_ucontext API

drivers/infiniband/core/uverbs_main.c | 45 ++++++++-
drivers/infiniband/hw/hns/hns_roce_cmd.c | 3 +
drivers/infiniband/hw/hns/hns_roce_device.h | 10 ++
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 148 ++++++++++++++++++++++++----
drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 +
drivers/infiniband/hw/hns/hns_roce_main.c | 77 ++++++++++++++-
drivers/infiniband/hw/mlx4/main.c | 34 -------
drivers/infiniband/hw/mlx5/main.c | 34 -------
8 files changed, 262 insertions(+), 90 deletions(-)

--
1.9.1



2018-05-26 08:10:09

by Wei Hu (Xavier)

[permalink] [raw]
Subject: [PATCH V3 rdma-next 4/4] RDMA/hns: Implement the disassociate_ucontext API

This patch implemented the IB core disassociate_ucontext API.

Signed-off-by: Wei Hu (Xavier) <[email protected]>

---
v2->v3: Addressed the comments from Jason. The related link:
https://lkml.org/lkml/2018/5/22/967
v1->v2: no change.
---
drivers/infiniband/hw/hns/hns_roce_device.h | 8 ++++
drivers/infiniband/hw/hns/hns_roce_main.c | 70 ++++++++++++++++++++++++++++-
2 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index da8512b..31221d5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -217,11 +217,19 @@ struct hns_roce_uar {
unsigned long logic_idx;
};

+struct hns_roce_vma_data {
+ struct list_head list;
+ struct vm_area_struct *vma;
+ struct mutex *vma_list_mutex;
+};
+
struct hns_roce_ucontext {
struct ib_ucontext ibucontext;
struct hns_roce_uar uar;
struct list_head page_list;
struct mutex page_mutex;
+ struct list_head vma_list;
+ struct mutex vma_list_mutex;
};

struct hns_roce_pd {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index ac51372..42296f0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -345,6 +345,8 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
if (ret)
goto error_fail_uar_alloc;

+ INIT_LIST_HEAD(&context->vma_list);
+ mutex_init(&context->vma_list_mutex);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
INIT_LIST_HEAD(&context->page_list);
mutex_init(&context->page_mutex);
@@ -375,6 +377,50 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}

+static void hns_roce_vma_open(struct vm_area_struct *vma)
+{
+ vma->vm_ops = NULL;
+}
+
+static void hns_roce_vma_close(struct vm_area_struct *vma)
+{
+ struct hns_roce_vma_data *vma_data;
+
+ vma_data = (struct hns_roce_vma_data *)vma->vm_private_data;
+ vma_data->vma = NULL;
+ mutex_lock(vma_data->vma_list_mutex);
+ list_del(&vma_data->list);
+ mutex_unlock(vma_data->vma_list_mutex);
+ kfree(vma_data);
+}
+
+static const struct vm_operations_struct hns_roce_vm_ops = {
+ .open = hns_roce_vma_open,
+ .close = hns_roce_vma_close,
+};
+
+static int hns_roce_set_vma_data(struct vm_area_struct *vma,
+ struct hns_roce_ucontext *context)
+{
+ struct list_head *vma_head = &context->vma_list;
+ struct hns_roce_vma_data *vma_data;
+
+ vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
+ if (!vma_data)
+ return -ENOMEM;
+
+ vma_data->vma = vma;
+ vma_data->vma_list_mutex = &context->vma_list_mutex;
+ vma->vm_private_data = vma_data;
+ vma->vm_ops = &hns_roce_vm_ops;
+
+ mutex_lock(&context->vma_list_mutex);
+ list_add(&vma_data->list, vma_head);
+ mutex_unlock(&context->vma_list_mutex);
+
+ return 0;
+}
+
static int hns_roce_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma)
{
@@ -400,7 +446,7 @@ static int hns_roce_mmap(struct ib_ucontext *context,
} else
return -EINVAL;

- return 0;
+ return hns_roce_set_vma_data(vma, to_hr_ucontext(context));
}

static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
@@ -424,6 +470,27 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
return 0;
}

+static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
+ struct hns_roce_vma_data *vma_data, *n;
+ struct vm_area_struct *vma;
+ int ret;
+
+ mutex_lock(&context->vma_list_mutex);
+ list_for_each_entry_safe(vma_data, n, &context->vma_list, list) {
+ vma = vma_data->vma;
+ ret = zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
+ WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
+
+ vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
+ vma->vm_ops = NULL;
+ list_del(&vma_data->list);
+ kfree(vma_data);
+ }
+ mutex_unlock(&context->vma_list_mutex);
+}
+
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
{
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
@@ -519,6 +586,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)

/* OTHERS */
ib_dev->get_port_immutable = hns_roce_port_immutable;
+ ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;

ib_dev->driver_id = RDMA_DRIVER_HNS;
ret = ib_register_device(ib_dev, NULL);
--
1.9.1


2018-05-26 08:10:33

by Wei Hu (Xavier)

[permalink] [raw]
Subject: [PATCH V3 rdma-next 3/4] RDMA/uverbs: Hoist the common process of disassociate_ucontext into ib core

This patch hoisted the common process of disassociate_ucontext
callback function into ib core code, and these code are common
to ervery ib_device driver.

Signed-off-by: Wei Hu (Xavier) <[email protected]>
---
drivers/infiniband/core/uverbs_main.c | 45 ++++++++++++++++++++++++++++++++++-
drivers/infiniband/hw/mlx4/main.c | 34 --------------------------
drivers/infiniband/hw/mlx5/main.c | 34 --------------------------
3 files changed, 44 insertions(+), 69 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 4445d8e..a0a1c70 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -41,6 +41,8 @@
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/anon_inodes.h>
@@ -1090,6 +1092,47 @@ static void ib_uverbs_add_one(struct ib_device *device)
return;
}

+static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct ib_device *ib_dev = ibcontext->device;
+ struct task_struct *owning_process = NULL;
+ struct mm_struct *owning_mm = NULL;
+
+ owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+ if (!owning_process)
+ return;
+
+ owning_mm = get_task_mm(owning_process);
+ if (!owning_mm) {
+ pr_info("no mm, disassociate ucontext is pending task termination\n");
+ while (1) {
+ put_task_struct(owning_process);
+ usleep_range(1000, 2000);
+ owning_process = get_pid_task(ibcontext->tgid,
+ PIDTYPE_PID);
+ if (!owning_process ||
+ owning_process->state == TASK_DEAD) {
+ pr_info("disassociate ucontext done, task was terminated\n");
+ /* in case task was dead need to release the
+ * task struct.
+ */
+ if (owning_process)
+ put_task_struct(owning_process);
+ return;
+ }
+ }
+ }
+
+ /* need to protect from a race on closing the vma as part of
+ * mlx5_ib_vma_close.
+ */
+ down_write(&owning_mm->mmap_sem);
+ ib_dev->disassociate_ucontext(ibcontext);
+ up_write(&owning_mm->mmap_sem);
+ mmput(owning_mm);
+ put_task_struct(owning_process);
+}
+
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
@@ -1130,7 +1173,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
* (e.g mmput).
*/
ib_uverbs_event_handler(&file->event_handler, &event);
- ib_dev->disassociate_ucontext(ucontext);
+ ib_uverbs_disassociate_ucontext(ucontext);
mutex_lock(&file->cleanup_mutex);
ib_uverbs_cleanup_ucontext(file, ucontext, true);
mutex_unlock(&file->cleanup_mutex);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bf12394..59aed45 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1189,40 +1189,10 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
int ret = 0;
struct vm_area_struct *vma;
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
-
- owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
- if (!owning_process)
- return;
-
- owning_mm = get_task_mm(owning_process);
- if (!owning_mm) {
- pr_info("no mm, disassociate ucontext is pending task termination\n");
- while (1) {
- /* make sure that task is dead before returning, it may
- * prevent a rare case of module down in parallel to a
- * call to mlx4_ib_vma_close.
- */
- put_task_struct(owning_process);
- usleep_range(1000, 2000);
- owning_process = get_pid_task(ibcontext->tgid,
- PIDTYPE_PID);
- if (!owning_process ||
- owning_process->state == TASK_DEAD) {
- pr_info("disassociate ucontext done, task was terminated\n");
- /* in case task was dead need to release the task struct */
- if (owning_process)
- put_task_struct(owning_process);
- return;
- }
- }
- }

/* need to protect from a race on closing the vma as part of
* mlx4_ib_vma_close().
*/
- down_write(&owning_mm->mmap_sem);
for (i = 0; i < HW_BAR_COUNT; i++) {
vma = context->hw_bar_info[i].vma;
if (!vma)
@@ -1241,10 +1211,6 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* context going to be destroyed, should not access ops any more */
context->hw_bar_info[i].vma->vm_ops = NULL;
}
-
- up_write(&owning_mm->mmap_sem);
- mmput(owning_mm);
- put_task_struct(owning_process);
}

static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f3e7d7c..136d64f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1965,38 +1965,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
struct vm_area_struct *vma;
struct mlx5_ib_vma_private_data *vma_private, *n;
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;

- owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
- if (!owning_process)
- return;
-
- owning_mm = get_task_mm(owning_process);
- if (!owning_mm) {
- pr_info("no mm, disassociate ucontext is pending task termination\n");
- while (1) {
- put_task_struct(owning_process);
- usleep_range(1000, 2000);
- owning_process = get_pid_task(ibcontext->tgid,
- PIDTYPE_PID);
- if (!owning_process ||
- owning_process->state == TASK_DEAD) {
- pr_info("disassociate ucontext done, task was terminated\n");
- /* in case task was dead need to release the
- * task struct.
- */
- if (owning_process)
- put_task_struct(owning_process);
- return;
- }
- }
- }
-
- /* need to protect from a race on closing the vma as part of
- * mlx5_ib_vma_close.
- */
- down_write(&owning_mm->mmap_sem);
mutex_lock(&context->vma_private_list_mutex);
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
list) {
@@ -2013,9 +1982,6 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
kfree(vma_private);
}
mutex_unlock(&context->vma_private_list_mutex);
- up_write(&owning_mm->mmap_sem);
- mmput(owning_mm);
- put_task_struct(owning_process);
}

static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
--
1.9.1


2018-05-26 08:11:24

by Wei Hu (Xavier)

[permalink] [raw]
Subject: [PATCH V3 rdma-next 2/4] RDMA/hns: Fix the illegal memory operation when cross page

This patch fixed the potential illegal operation when using the
extend sge buffer cross page in post send operation. The bug
will cause the calltrace as below.

[ 3302.922107] Unable to handle kernel paging request at virtual address ffff00003b3a0004
[ 3302.930009] Mem abort info:
[ 3302.932790] Exception class = DABT (current EL), IL = 32 bits
[ 3302.938695] SET = 0, FnV = 0
[ 3302.941735] EA = 0, S1PTW = 0
[ 3302.944863] Data abort info:
[ 3302.947729] ISV = 0, ISS = 0x00000047
[ 3302.951551] CM = 0, WnR = 1
[ 3302.954506] swapper pgtable: 4k pages, 48-bit VAs, pgd = ffff000009ea5000
[ 3302.961279] [ffff00003b3a0004] *pgd=00000023dfffe003, *pud=00000023dfffd003, *pmd=00000022dc84c003, *pte=0000000000000000
[ 3302.972224] Internal error: Oops: 96000047 [#1] SMP
[ 3302.999509] CPU: 9 PID: 19628 Comm: roce_test_main Tainted: G OE 4.14.10 #1
[ 3303.007498] task: ffff80234df78000 task.stack: ffff00000f640000
[ 3303.013412] PC is at hns_roce_v2_post_send+0x690/0xe20 [hns_roce_pci]
[ 3303.019843] LR is at hns_roce_v2_post_send+0x658/0xe20 [hns_roce_pci]
[ 3303.026269] pc : [<ffff0000020694f8>] lr : [<ffff0000020694c0>] pstate: 804001c9
[ 3303.033649] sp : ffff00000f643870
[ 3303.036951] x29: ffff00000f643870 x28: ffff80232bfa9c00
[ 3303.042250] x27: ffff80234d909380 x26: ffff00003b37f0c0
[ 3303.047549] x25: 0000000000000000 x24: 0000000000000003
[ 3303.052848] x23: 0000000000000000 x22: 0000000000000000
[ 3303.058148] x21: 0000000000000101 x20: 0000000000000001
[ 3303.063447] x19: ffff80236163f800 x18: 0000000000000000
[ 3303.068746] x17: 0000ffff86b76fc8 x16: ffff000008301600
[ 3303.074045] x15: 000020a51c000000 x14: 3128726464615f65
[ 3303.079344] x13: 746f6d6572202c29 x12: 303035312879656b
[ 3303.084643] x11: 723a6f666e692072 x10: 573a6f666e693a5d
[ 3303.089943] x9 : 0000000000000004 x8 : ffff8023ce38b000
[ 3303.095242] x7 : ffff8023ce38b320 x6 : 0000000000000418
[ 3303.100541] x5 : ffff80232bfa9cc8 x4 : 0000000000000030
[ 3303.105839] x3 : 0000000000000100 x2 : 0000000000000200
[ 3303.111138] x1 : 0000000000000320 x0 : ffff00003b3a0000
[ 3303.116438] Process roce_test_main (pid: 19628, stack limit = 0xffff00000f640000)
[ 3303.123906] Call trace:
[ 3303.126339] Exception stack(0xffff00000f643730 to 0xffff00000f643870)
[ 3303.215790] [<ffff0000020694f8>] hns_roce_v2_post_send+0x690/0xe20 [hns_roce_pci]
[ 3303.223293] [<ffff0000021c3750>] rt_ktest_post_send+0x5d0/0x8b8 [rdma_test]
[ 3303.230261] [<ffff0000021b3234>] exec_send_cmd+0x664/0x1350 [rdma_test]
[ 3303.236881] [<ffff0000021b8b30>] rt_ktest_dispatch_cmd_3+0x1510/0x3790 [rdma_test]
[ 3303.244455] [<ffff0000021bae54>] rt_ktest_dispatch_cmd_2+0xa4/0x118 [rdma_test]
[ 3303.251770] [<ffff0000021bafec>] rt_ktest_dispatch_cmd+0x124/0xaa8 [rdma_test]
[ 3303.258997] [<ffff0000021bbc3c>] rt_ktest_dev_write+0x2cc/0x568 [rdma_test]
[ 3303.265947] [<ffff0000082ad688>] __vfs_write+0x60/0x18c
[ 3303.271158] [<ffff0000082ad998>] vfs_write+0xa8/0x198
[ 3303.276196] [<ffff0000082adc7c>] SyS_write+0x6c/0xd4
[ 3303.281147] Exception stack(0xffff00000f643ec0 to 0xffff00000f644000)
[ 3303.287573] 3ec0: 0000000000000003 0000fffffc85faa8 0000000000004e60 0000000000000000
[ 3303.295388] 3ee0: 0000000021fb2000 000000000000ffff eff0e3efe4e58080 0000fffffcc724fe
[ 3303.303204] 3f00: 0000000000000040 1999999999999999 0101010101010101 0000000000000038
[ 3303.311019] 3f20: 0000000000000005 ffffffffffffffff 0d73757461747320 ffffffffffffffff
[ 3303.318835] 3f40: 0000000000000000 0000000000459b00 0000fffffc85e360 000000000043d788
[ 3303.326650] 3f60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 3303.334465] 3f80: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 3303.342281] 3fa0: 0000000000000000 0000fffffc85e570 0000000000438804 0000fffffc85e570
[ 3303.350096] 3fc0: 0000ffff8553f618 0000000080000000 0000000000000003 0000000000000040
[ 3303.357911] 3fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 3303.365729] [<ffff000008083808>] __sys_trace_return+0x0/0x4
[ 3303.371288] Code: b94008e9 34000129 b9400ce2 110006b5 (b9000402)
[ 3303.377377] ---[ end trace fd5ab98b3325cf9a ]---

Reported-by: Jie Chen <[email protected]>
Reported-by: Xiping Zhang (Francis) <[email protected]>
Fixes: b1c158350968("RDMA/hns: Get rid of virt_to_page and vmap calls after dma_alloc_coherent")
Signed-off-by: Wei Hu (Xavier) <[email protected]>

---
v2->v3: Add calltrace info and modify judegment conditon according
to Jason's comment.
v1->v2: Modify the Fixes statement according to Leon's comment.
---
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 72 +++++++++++++++++++++---------
drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 +
2 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 368f682..46f289d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -34,6 +34,7 @@
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <linux/types.h>
#include <net/addrconf.h>
#include <rdma/ib_umem.h>

@@ -52,6 +53,53 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = cpu_to_le32(sg->length);
}

+static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
+ unsigned int *sge_ind)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ struct ib_sge *sg;
+ int num_in_wqe = 0;
+ int extend_sge_num;
+ int fi_sge_num;
+ int se_sge_num;
+ int shift;
+ int i;
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
+ num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
+ extend_sge_num = wr->num_sge - num_in_wqe;
+ sg = wr->sg_list + num_in_wqe;
+ shift = qp->hr_buf.page_shift;
+
+ /*
+ * Check whether wr->num_sge sges are in the same page. If not, we
+ * should calculate how many sges in the first page and the second
+ * page.
+ */
+ dseg = get_send_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
+ fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
+ (uintptr_t)dseg) /
+ sizeof(struct hns_roce_v2_wqe_data_seg);
+ if (extend_sge_num > fi_sge_num) {
+ se_sge_num = extend_sge_num - fi_sge_num;
+ for (i = 0; i < fi_sge_num; i++) {
+ set_data_seg_v2(dseg++, sg + i);
+ (*sge_ind)++;
+ }
+ dseg = get_send_extend_sge(qp,
+ (*sge_ind) & (qp->sge.sge_cnt - 1));
+ for (i = 0; i < se_sge_num; i++) {
+ set_data_seg_v2(dseg++, sg + fi_sge_num + i);
+ (*sge_ind)++;
+ }
+ } else {
+ for (i = 0; i < extend_sge_num; i++) {
+ set_data_seg_v2(dseg++, sg + i);
+ (*sge_ind)++;
+ }
+ }
+}
+
static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
void *wqe, unsigned int *sge_ind,
@@ -85,7 +133,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
1);
} else {
- if (wr->num_sge <= 2) {
+ if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
@@ -98,24 +146,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
(*sge_ind) & (qp->sge.sge_cnt - 1));

- for (i = 0; i < 2; i++) {
+ for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
dseg++;
}
}

- dseg = get_send_extend_sge(qp,
- (*sge_ind) & (qp->sge.sge_cnt - 1));
-
- for (i = 0; i < wr->num_sge - 2; i++) {
- if (likely(wr->sg_list[i + 2].length)) {
- set_data_seg_v2(dseg,
- wr->sg_list + 2 + i);
- dseg++;
- (*sge_ind)++;
- }
- }
+ set_extend_sge(qp, wr, sge_ind);
}

roce_set_field(rc_sq_wqe->byte_16,
@@ -318,13 +356,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
GID_LEN_V2);

- dseg = get_send_extend_sge(qp,
- sge_ind & (qp->sge.sge_cnt - 1));
- for (i = 0; i < wr->num_sge; i++) {
- set_data_seg_v2(dseg + i, wr->sg_list + i);
- sge_ind++;
- }
-
+ set_extend_sge(qp, wr, &sge_ind);
ind++;
} else if (ibqp->qp_type == IB_QPT_RC) {
rc_sq_wqe = wqe;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 2caeb4c..d47675f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -77,6 +77,7 @@
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
#define HNS_ROCE_INVALID_LKEY 0x100
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
+#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2

#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
--
1.9.1


2018-05-26 08:13:12

by Wei Hu (Xavier)

[permalink] [raw]
Subject: [PATCH V3 rdma-next 1/4] RDMA/hns: Add reset process for RoCE in hip08

This patch added reset process for RoCE in hip08.

Signed-off-by: Wei Hu (Xavier) <[email protected]>

--
v2->v3: no change.
v1->v2: 1.Delete handle->priv = NULL in hns_roce_hw_v2_uninit_instance.
2.Add hns_roce_hw_v2_reset_notify_init callback function,
When RoCE reinit failed in this function, inform NIC driver.
The related link of Jason's commets:
https://www.spinics.net/lists/linux-rdma/msg65009.html
---
drivers/infiniband/hw/hns/hns_roce_cmd.c | 3 ++
drivers/infiniband/hw/hns/hns_roce_device.h | 2 +
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 76 +++++++++++++++++++++++++++++
drivers/infiniband/hw/hns/hns_roce_main.c | 7 +++
4 files changed, 88 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 9ebe839..a0ba19d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -176,6 +176,9 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
unsigned long in_modifier, u8 op_modifier, u16 op,
unsigned long timeout)
{
+ if (hr_dev->is_reset)
+ return 0;
+
if (hr_dev->cmd.use_events)
return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
in_modifier, op_modifier, op,
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 412297d4..da8512b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -774,6 +774,8 @@ struct hns_roce_dev {
const char *irq_names[HNS_ROCE_MAX_IRQ_NUM];
spinlock_t sm_lock;
spinlock_t bt_cmd_lock;
+ bool active;
+ bool is_reset;
struct hns_roce_ib_iboe iboe;

struct list_head pgdir_list;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index a25c3da..368f682 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -768,6 +768,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
int ret = 0;
int ntc;

+ if (hr_dev->is_reset)
+ return 0;
+
spin_lock_bh(&csq->lock);

if (num > hns_roce_cmq_space(csq)) {
@@ -4790,14 +4793,87 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
{
struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;

+ if (!hr_dev)
+ return;
+
hns_roce_exit(hr_dev);
kfree(hr_dev->priv);
ib_dealloc_device(&hr_dev->ib_dev);
}

+static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
+{
+ struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
+ struct ib_event event;
+
+ if (!hr_dev) {
+ dev_err(&handle->pdev->dev,
+ "Input parameter handle->priv is NULL!\n");
+ return -EINVAL;
+ }
+
+ hr_dev->active = false;
+ hr_dev->is_reset = true;
+
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.device = &hr_dev->ib_dev;
+ event.element.port_num = 1;
+ ib_dispatch_event(&event);
+
+ return 0;
+}
+
+static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
+{
+ int ret;
+
+ ret = hns_roce_hw_v2_init_instance(handle);
+ if (ret) {
+ /* when reset notify type is HNAE3_INIT_CLIENT In reset notify
+ * callback function, RoCE Engine reinitialize. If RoCE reinit
+ * failed, we should inform NIC driver.
+ */
+ handle->priv = NULL;
+ dev_err(&handle->pdev->dev,
+ "In reset process RoCE reinit failed %d.\n", ret);
+ }
+
+ return ret;
+}
+
+static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
+{
+ msleep(100);
+ hns_roce_hw_v2_uninit_instance(handle, false);
+ return 0;
+}
+
+static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle,
+ enum hnae3_reset_notify_type type)
+{
+ int ret = 0;
+
+ switch (type) {
+ case HNAE3_DOWN_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_down(handle);
+ break;
+ case HNAE3_INIT_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_init(handle);
+ break;
+ case HNAE3_UNINIT_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_uninit(handle);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
static const struct hnae3_client_ops hns_roce_hw_v2_ops = {
.init_instance = hns_roce_hw_v2_init_instance,
.uninit_instance = hns_roce_hw_v2_uninit_instance,
+ .reset_notify = hns_roce_hw_v2_reset_notify,
};

static struct hnae3_client hns_roce_hw_v2_client = {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 1b79a38..ac51372 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -332,6 +332,9 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);

+ if (!hr_dev->active)
+ return ERR_PTR(-EAGAIN);
+
resp.qp_tab_size = hr_dev->caps.num_qps;

context = kmalloc(sizeof(*context), GFP_KERNEL);
@@ -425,6 +428,7 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
{
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;

+ hr_dev->active = false;
unregister_netdevice_notifier(&iboe->nb);
ib_unregister_device(&hr_dev->ib_dev);
}
@@ -536,6 +540,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
goto error_failed_setup_mtu_mac;
}

+ hr_dev->active = true;
return 0;

error_failed_setup_mtu_mac:
@@ -728,6 +733,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
return ret;
}
}
+ hr_dev->is_reset = false;

if (hr_dev->hw->cmq_init) {
ret = hr_dev->hw->cmq_init(hr_dev);
@@ -827,6 +833,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
void hns_roce_exit(struct hns_roce_dev *hr_dev)
{
hns_roce_unregister_device(hr_dev);
+
if (hr_dev->hw->hw_exit)
hr_dev->hw->hw_exit(hr_dev);
hns_roce_cleanup_bitmap(hr_dev);
--
1.9.1


2018-05-27 15:06:48

by Leon Romanovsky

[permalink] [raw]
Subject: Re: [PATCH V3 rdma-next 3/4] RDMA/uverbs: Hoist the common process of disassociate_ucontext into ib core

On Sat, May 26, 2018 at 04:41:46PM +0800, Wei Hu (Xavier) wrote:
> This patch hoisted the common process of disassociate_ucontext
> callback function into ib core code, and these code are common
> to ervery ib_device driver.
>
> Signed-off-by: Wei Hu (Xavier) <[email protected]>
> ---
> drivers/infiniband/core/uverbs_main.c | 45 ++++++++++++++++++++++++++++++++++-
> drivers/infiniband/hw/mlx4/main.c | 34 --------------------------
> drivers/infiniband/hw/mlx5/main.c | 34 --------------------------
> 3 files changed, 44 insertions(+), 69 deletions(-)
>
> diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
> index 4445d8e..a0a1c70 100644
> --- a/drivers/infiniband/core/uverbs_main.c
> +++ b/drivers/infiniband/core/uverbs_main.c
> @@ -41,6 +41,8 @@
> #include <linux/fs.h>
> #include <linux/poll.h>
> #include <linux/sched.h>
> +#include <linux/sched/mm.h>
> +#include <linux/sched/task.h>
> #include <linux/file.h>
> #include <linux/cdev.h>
> #include <linux/anon_inodes.h>
> @@ -1090,6 +1092,47 @@ static void ib_uverbs_add_one(struct ib_device *device)
> return;
> }
>
> +static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
> +{
> + struct ib_device *ib_dev = ibcontext->device;
> + struct task_struct *owning_process = NULL;
> + struct mm_struct *owning_mm = NULL;
> +
> + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
> + if (!owning_process)
> + return;
> +
> + owning_mm = get_task_mm(owning_process);
> + if (!owning_mm) {
> + pr_info("no mm, disassociate ucontext is pending task termination\n");
> + while (1) {
> + put_task_struct(owning_process);
> + usleep_range(1000, 2000);
> + owning_process = get_pid_task(ibcontext->tgid,
> + PIDTYPE_PID);
> + if (!owning_process ||
> + owning_process->state == TASK_DEAD) {
> + pr_info("disassociate ucontext done, task was terminated\n");
> + /* in case task was dead need to release the
> + * task struct.
> + */
> + if (owning_process)
> + put_task_struct(owning_process);
> + return;
> + }
> + }
> + }
> +
> + /* need to protect from a race on closing the vma as part of
> + * mlx5_ib_vma_close.

Except this "mlx5_ib_vma_close"

Acked-by: Leon Romanovsky <[email protected]>

> + */
> + down_write(&owning_mm->mmap_sem);
> + ib_dev->disassociate_ucontext(ibcontext);
> + up_write(&owning_mm->mmap_sem);
> + mmput(owning_mm);
> + put_task_struct(owning_process);
> +}
> +
> static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
> struct ib_device *ib_dev)
> {
> @@ -1130,7 +1173,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
> * (e.g mmput).
> */
> ib_uverbs_event_handler(&file->event_handler, &event);
> - ib_dev->disassociate_ucontext(ucontext);
> + ib_uverbs_disassociate_ucontext(ucontext);
> mutex_lock(&file->cleanup_mutex);
> ib_uverbs_cleanup_ucontext(file, ucontext, true);
> mutex_unlock(&file->cleanup_mutex);
> diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
> index bf12394..59aed45 100644
> --- a/drivers/infiniband/hw/mlx4/main.c
> +++ b/drivers/infiniband/hw/mlx4/main.c
> @@ -1189,40 +1189,10 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> int ret = 0;
> struct vm_area_struct *vma;
> struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
> - struct task_struct *owning_process = NULL;
> - struct mm_struct *owning_mm = NULL;
> -
> - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
> - if (!owning_process)
> - return;
> -
> - owning_mm = get_task_mm(owning_process);
> - if (!owning_mm) {
> - pr_info("no mm, disassociate ucontext is pending task termination\n");
> - while (1) {
> - /* make sure that task is dead before returning, it may
> - * prevent a rare case of module down in parallel to a
> - * call to mlx4_ib_vma_close.
> - */
> - put_task_struct(owning_process);
> - usleep_range(1000, 2000);
> - owning_process = get_pid_task(ibcontext->tgid,
> - PIDTYPE_PID);
> - if (!owning_process ||
> - owning_process->state == TASK_DEAD) {
> - pr_info("disassociate ucontext done, task was terminated\n");
> - /* in case task was dead need to release the task struct */
> - if (owning_process)
> - put_task_struct(owning_process);
> - return;
> - }
> - }
> - }
>
> /* need to protect from a race on closing the vma as part of
> * mlx4_ib_vma_close().
> */
> - down_write(&owning_mm->mmap_sem);
> for (i = 0; i < HW_BAR_COUNT; i++) {
> vma = context->hw_bar_info[i].vma;
> if (!vma)
> @@ -1241,10 +1211,6 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> /* context going to be destroyed, should not access ops any more */
> context->hw_bar_info[i].vma->vm_ops = NULL;
> }
> -
> - up_write(&owning_mm->mmap_sem);
> - mmput(owning_mm);
> - put_task_struct(owning_process);
> }
>
> static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
> diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
> index f3e7d7c..136d64f 100644
> --- a/drivers/infiniband/hw/mlx5/main.c
> +++ b/drivers/infiniband/hw/mlx5/main.c
> @@ -1965,38 +1965,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> struct vm_area_struct *vma;
> struct mlx5_ib_vma_private_data *vma_private, *n;
> struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
> - struct task_struct *owning_process = NULL;
> - struct mm_struct *owning_mm = NULL;
>
> - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
> - if (!owning_process)
> - return;
> -
> - owning_mm = get_task_mm(owning_process);
> - if (!owning_mm) {
> - pr_info("no mm, disassociate ucontext is pending task termination\n");
> - while (1) {
> - put_task_struct(owning_process);
> - usleep_range(1000, 2000);
> - owning_process = get_pid_task(ibcontext->tgid,
> - PIDTYPE_PID);
> - if (!owning_process ||
> - owning_process->state == TASK_DEAD) {
> - pr_info("disassociate ucontext done, task was terminated\n");
> - /* in case task was dead need to release the
> - * task struct.
> - */
> - if (owning_process)
> - put_task_struct(owning_process);
> - return;
> - }
> - }
> - }
> -
> - /* need to protect from a race on closing the vma as part of
> - * mlx5_ib_vma_close.
> - */
> - down_write(&owning_mm->mmap_sem);
> mutex_lock(&context->vma_private_list_mutex);
> list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
> list) {
> @@ -2013,9 +1982,6 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> kfree(vma_private);
> }
> mutex_unlock(&context->vma_private_list_mutex);
> - up_write(&owning_mm->mmap_sem);
> - mmput(owning_mm);
> - put_task_struct(owning_process);
> }
>
> static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
> --
> 1.9.1
>


Attachments:
(No filename) (7.08 kB)
signature.asc (817.00 B)
Download all attachments

2018-05-28 12:03:52

by Wei Hu (Xavier)

[permalink] [raw]
Subject: Re: [PATCH V3 rdma-next 3/4] RDMA/uverbs: Hoist the common process of disassociate_ucontext into ib core



On 2018/5/27 23:05, Leon Romanovsky wrote:
> On Sat, May 26, 2018 at 04:41:46PM +0800, Wei Hu (Xavier) wrote:
>> This patch hoisted the common process of disassociate_ucontext
>> callback function into ib core code, and these code are common
>> to ervery ib_device driver.
>>
>> Signed-off-by: Wei Hu (Xavier) <[email protected]>
>> ---
>> drivers/infiniband/core/uverbs_main.c | 45 ++++++++++++++++++++++++++++++++++-
>> drivers/infiniband/hw/mlx4/main.c | 34 --------------------------
>> drivers/infiniband/hw/mlx5/main.c | 34 --------------------------
>> 3 files changed, 44 insertions(+), 69 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
>> index 4445d8e..a0a1c70 100644
>> --- a/drivers/infiniband/core/uverbs_main.c
>> +++ b/drivers/infiniband/core/uverbs_main.c
>> @@ -41,6 +41,8 @@
>> #include <linux/fs.h>
>> #include <linux/poll.h>
>> #include <linux/sched.h>
>> +#include <linux/sched/mm.h>
>> +#include <linux/sched/task.h>
>> #include <linux/file.h>
>> #include <linux/cdev.h>
>> #include <linux/anon_inodes.h>
>> @@ -1090,6 +1092,47 @@ static void ib_uverbs_add_one(struct ib_device *device)
>> return;
>> }
>>
>> +static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
>> +{
>> + struct ib_device *ib_dev = ibcontext->device;
>> + struct task_struct *owning_process = NULL;
>> + struct mm_struct *owning_mm = NULL;
>> +
>> + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
>> + if (!owning_process)
>> + return;
>> +
>> + owning_mm = get_task_mm(owning_process);
>> + if (!owning_mm) {
>> + pr_info("no mm, disassociate ucontext is pending task termination\n");
>> + while (1) {
>> + put_task_struct(owning_process);
>> + usleep_range(1000, 2000);
>> + owning_process = get_pid_task(ibcontext->tgid,
>> + PIDTYPE_PID);
>> + if (!owning_process ||
>> + owning_process->state == TASK_DEAD) {
>> + pr_info("disassociate ucontext done, task was terminated\n");
>> + /* in case task was dead need to release the
>> + * task struct.
>> + */
>> + if (owning_process)
>> + put_task_struct(owning_process);
>> + return;
>> + }
>> + }
>> + }
>> +
>> + /* need to protect from a race on closing the vma as part of
>> + * mlx5_ib_vma_close.
> Except this "mlx5_ib_vma_close"
>
> Acked-by: Leon Romanovsky <[email protected]>
Hi, Leon
Thanks, We will fix it in V4.
Regards
Wei Hu
>> + */
>> + down_write(&owning_mm->mmap_sem);
>> + ib_dev->disassociate_ucontext(ibcontext);
>> + up_write(&owning_mm->mmap_sem);
>> + mmput(owning_mm);
>> + put_task_struct(owning_process);
>> +}
>> +
>> static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
>> struct ib_device *ib_dev)
>> {
>> @@ -1130,7 +1173,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
>> * (e.g mmput).
>> */
>> ib_uverbs_event_handler(&file->event_handler, &event);
>> - ib_dev->disassociate_ucontext(ucontext);
>> + ib_uverbs_disassociate_ucontext(ucontext);
>> mutex_lock(&file->cleanup_mutex);
>> ib_uverbs_cleanup_ucontext(file, ucontext, true);
>> mutex_unlock(&file->cleanup_mutex);
>> diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
>> index bf12394..59aed45 100644
>> --- a/drivers/infiniband/hw/mlx4/main.c
>> +++ b/drivers/infiniband/hw/mlx4/main.c
>> @@ -1189,40 +1189,10 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
>> int ret = 0;
>> struct vm_area_struct *vma;
>> struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
>> - struct task_struct *owning_process = NULL;
>> - struct mm_struct *owning_mm = NULL;
>> -
>> - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
>> - if (!owning_process)
>> - return;
>> -
>> - owning_mm = get_task_mm(owning_process);
>> - if (!owning_mm) {
>> - pr_info("no mm, disassociate ucontext is pending task termination\n");
>> - while (1) {
>> - /* make sure that task is dead before returning, it may
>> - * prevent a rare case of module down in parallel to a
>> - * call to mlx4_ib_vma_close.
>> - */
>> - put_task_struct(owning_process);
>> - usleep_range(1000, 2000);
>> - owning_process = get_pid_task(ibcontext->tgid,
>> - PIDTYPE_PID);
>> - if (!owning_process ||
>> - owning_process->state == TASK_DEAD) {
>> - pr_info("disassociate ucontext done, task was terminated\n");
>> - /* in case task was dead need to release the task struct */
>> - if (owning_process)
>> - put_task_struct(owning_process);
>> - return;
>> - }
>> - }
>> - }
>>
>> /* need to protect from a race on closing the vma as part of
>> * mlx4_ib_vma_close().
>> */
>> - down_write(&owning_mm->mmap_sem);
>> for (i = 0; i < HW_BAR_COUNT; i++) {
>> vma = context->hw_bar_info[i].vma;
>> if (!vma)
>> @@ -1241,10 +1211,6 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
>> /* context going to be destroyed, should not access ops any more */
>> context->hw_bar_info[i].vma->vm_ops = NULL;
>> }
>> -
>> - up_write(&owning_mm->mmap_sem);
>> - mmput(owning_mm);
>> - put_task_struct(owning_process);
>> }
>>
>> static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
>> diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
>> index f3e7d7c..136d64f 100644
>> --- a/drivers/infiniband/hw/mlx5/main.c
>> +++ b/drivers/infiniband/hw/mlx5/main.c
>> @@ -1965,38 +1965,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
>> struct vm_area_struct *vma;
>> struct mlx5_ib_vma_private_data *vma_private, *n;
>> struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
>> - struct task_struct *owning_process = NULL;
>> - struct mm_struct *owning_mm = NULL;
>>
>> - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
>> - if (!owning_process)
>> - return;
>> -
>> - owning_mm = get_task_mm(owning_process);
>> - if (!owning_mm) {
>> - pr_info("no mm, disassociate ucontext is pending task termination\n");
>> - while (1) {
>> - put_task_struct(owning_process);
>> - usleep_range(1000, 2000);
>> - owning_process = get_pid_task(ibcontext->tgid,
>> - PIDTYPE_PID);
>> - if (!owning_process ||
>> - owning_process->state == TASK_DEAD) {
>> - pr_info("disassociate ucontext done, task was terminated\n");
>> - /* in case task was dead need to release the
>> - * task struct.
>> - */
>> - if (owning_process)
>> - put_task_struct(owning_process);
>> - return;
>> - }
>> - }
>> - }
>> -
>> - /* need to protect from a race on closing the vma as part of
>> - * mlx5_ib_vma_close.
>> - */
>> - down_write(&owning_mm->mmap_sem);
>> mutex_lock(&context->vma_private_list_mutex);
>> list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
>> list) {
>> @@ -2013,9 +1982,6 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
>> kfree(vma_private);
>> }
>> mutex_unlock(&context->vma_private_list_mutex);
>> - up_write(&owning_mm->mmap_sem);
>> - mmput(owning_mm);
>> - put_task_struct(owning_process);
>> }
>>
>> static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
>> --
>> 1.9.1
>>



2018-05-28 16:57:00

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH V3 rdma-next 4/4] RDMA/hns: Implement the disassociate_ucontext API

On Sat, May 26, 2018 at 04:41:47PM +0800, Wei Hu (Xavier) wrote:
> This patch implemented the IB core disassociate_ucontext API.
>
> Signed-off-by: Wei Hu (Xavier) <[email protected]>
>
> v2->v3: Addressed the comments from Jason. The related link:
> https://lkml.org/lkml/2018/5/22/967
> v1->v2: no change.
> drivers/infiniband/hw/hns/hns_roce_device.h | 8 ++++
> drivers/infiniband/hw/hns/hns_roce_main.c | 70 ++++++++++++++++++++++++++++-
> 2 files changed, 77 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index da8512b..31221d5 100644
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -217,11 +217,19 @@ struct hns_roce_uar {
> unsigned long logic_idx;
> };
>
> +struct hns_roce_vma_data {
> + struct list_head list;
> + struct vm_area_struct *vma;
> + struct mutex *vma_list_mutex;
> +};

This stuff is basically shared in all the drivers too - would it make
sense to maintain this information and do the zap_pte in the core code
as well?

There is no way to implement dis-associate without also doing the
zaps..

Jason

2018-05-29 11:39:12

by Wei Hu (Xavier)

[permalink] [raw]
Subject: Re: [PATCH V3 rdma-next 4/4] RDMA/hns: Implement the disassociate_ucontext API



On 2018/5/29 0:53, Jason Gunthorpe wrote:
> On Sat, May 26, 2018 at 04:41:47PM +0800, Wei Hu (Xavier) wrote:
>> This patch implemented the IB core disassociate_ucontext API.
>>
>> Signed-off-by: Wei Hu (Xavier) <[email protected]>
>>
>> v2->v3: Addressed the comments from Jason. The related link:
>> https://lkml.org/lkml/2018/5/22/967
>> v1->v2: no change.
>> drivers/infiniband/hw/hns/hns_roce_device.h | 8 ++++
>> drivers/infiniband/hw/hns/hns_roce_main.c | 70 ++++++++++++++++++++++++++++-
>> 2 files changed, 77 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>> index da8512b..31221d5 100644
>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>> @@ -217,11 +217,19 @@ struct hns_roce_uar {
>> unsigned long logic_idx;
>> };
>>
>> +struct hns_roce_vma_data {
>> + struct list_head list;
>> + struct vm_area_struct *vma;
>> + struct mutex *vma_list_mutex;
>> +};
> This stuff is basically shared in all the drivers too - would it make
> sense to maintain this information and do the zap_pte in the core code
> as well?
>
> There is no way to implement dis-associate without also doing the
> zaps..
>
> Jason
>
> .
Hi, Jason
You may have noticed there are difference about the process of
zap_vmp_ptes
between mlx4_ib_disassociate_ucontext and mlx5_ib_disassociate_ucontext
If I hoist zap_vmp_ptes process into the core code, Maybe need to
modify more mlx4 code.
Maybe I am not the right person to do this thing, Could you give
more suggestion?
Thanks

Regards
Wei Hu
>