2018-08-27 11:28:19

by Michal Hocko

[permalink] [raw]
Subject: [PATCH 0/3] mmu_notifiers follow ups

Hi Andrew,
Tetsuo has noticed some fallouts from 93065ac753e4 ("mm, oom:
distinguish blockable mode for mmu notifiers"). One of them has
been fixed and picked up by AMD/DRM maintainer [1]. XEN issue is
fixed by patch 1. I have also clarified expectations about blockable
semantic of invalidate_range_end. Finally the last patch removes
MMU_INVALIDATE_DOES_NOT_BLOCK which is no longer used nor needed.

[1] http://lkml.kernel.org/r/[email protected]




2018-08-27 11:28:10

by Michal Hocko

[permalink] [raw]
Subject: [PATCH 3/3] Revert "mm, mmu_notifier: annotate mmu notifiers with blockable invalidate callbacks"

From: Michal Hocko <[email protected]>

This reverts commit 5ff7091f5a2ca1b7b642ca0dbdede8f693a56926.

MMU_INVALIDATE_DOES_NOT_BLOCK flags was the only one used and it is no
longer needed since 93065ac753e4 ("mm, oom: distinguish blockable mode
for mmu notifiers"). We now have a full support for per range !blocking
behavior so we can drop the stop gap workaround which the per notifier
flag was used for.

Cc: David Rientjes <[email protected]>
Signed-off-by: Michal Hocko <[email protected]>
---
drivers/infiniband/hw/hfi1/mmu_rb.c | 1 -
drivers/iommu/amd_iommu_v2.c | 1 -
drivers/iommu/intel-svm.c | 1 -
drivers/misc/sgi-gru/grutlbpurge.c | 1 -
include/linux/mmu_notifier.h | 23 ---------------------
mm/mmu_notifier.c | 31 -----------------------------
virt/kvm/kvm_main.c | 1 -
7 files changed, 59 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index e1c7996c018e..475b769e120c 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -77,7 +77,6 @@ static void do_remove(struct mmu_rb_handler *handler,
static void handle_remove(struct work_struct *work);

static const struct mmu_notifier_ops mn_opts = {
- .flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
.invalidate_range_start = mmu_notifier_range_start,
};

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 58da65df03f5..fd552235bd13 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -427,7 +427,6 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
}

static const struct mmu_notifier_ops iommu_mn = {
- .flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
.release = mn_release,
.clear_flush_young = mn_clear_flush_young,
.invalidate_range = mn_invalidate_range,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 7d65aab36a96..e16ee247add8 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -292,7 +292,6 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
}

static const struct mmu_notifier_ops intel_mmuops = {
- .flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
.release = intel_mm_release,
.change_pte = intel_change_pte,
.invalidate_range = intel_invalidate_range,
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index be28f05bfafa..03b49d52092e 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -261,7 +261,6 @@ static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)


static const struct mmu_notifier_ops gru_mmuops = {
- .flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
.invalidate_range_start = gru_invalidate_range_start,
.invalidate_range_end = gru_invalidate_range_end,
.release = gru_release,
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 698e371aafe3..9893a6432adf 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -2,7 +2,6 @@
#ifndef _LINUX_MMU_NOTIFIER_H
#define _LINUX_MMU_NOTIFIER_H

-#include <linux/types.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mm_types.h>
@@ -11,9 +10,6 @@
struct mmu_notifier;
struct mmu_notifier_ops;

-/* mmu_notifier_ops flags */
-#define MMU_INVALIDATE_DOES_NOT_BLOCK (0x01)
-
#ifdef CONFIG_MMU_NOTIFIER

/*
@@ -30,15 +26,6 @@ struct mmu_notifier_mm {
};

struct mmu_notifier_ops {
- /*
- * Flags to specify behavior of callbacks for this MMU notifier.
- * Used to determine which context an operation may be called.
- *
- * MMU_INVALIDATE_DOES_NOT_BLOCK: invalidate_range_* callbacks do not
- * block
- */
- int flags;
-
/*
* Called either by mmu_notifier_unregister or when the mm is
* being destroyed by exit_mmap, always before all pages are
@@ -183,10 +170,6 @@ struct mmu_notifier_ops {
* Note that this function might be called with just a sub-range
* of what was passed to invalidate_range_start()/end(), if
* called between those functions.
- *
- * If this callback cannot block, and invalidate_range_{start,end}
- * cannot block, mmu_notifier_ops.flags should have
- * MMU_INVALIDATE_DOES_NOT_BLOCK set.
*/
void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
unsigned long start, unsigned long end);
@@ -241,7 +224,6 @@ extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
bool only_end);
extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
unsigned long start, unsigned long end);
-extern bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm);

static inline void mmu_notifier_release(struct mm_struct *mm)
{
@@ -495,11 +477,6 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
{
}

-static inline bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm)
-{
- return false;
-}
-
static inline void mmu_notifier_mm_init(struct mm_struct *mm)
{
}
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 82bb1a939c0e..5119ff846769 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -247,37 +247,6 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
}
EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range);

-/*
- * Must be called while holding mm->mmap_sem for either read or write.
- * The result is guaranteed to be valid until mm->mmap_sem is dropped.
- */
-bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm)
-{
- struct mmu_notifier *mn;
- int id;
- bool ret = false;
-
- WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
-
- if (!mm_has_notifiers(mm))
- return ret;
-
- id = srcu_read_lock(&srcu);
- hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
- if (!mn->ops->invalidate_range &&
- !mn->ops->invalidate_range_start &&
- !mn->ops->invalidate_range_end)
- continue;
-
- if (!(mn->ops->flags & MMU_INVALIDATE_DOES_NOT_BLOCK)) {
- ret = true;
- break;
- }
- }
- srcu_read_unlock(&srcu, id);
- return ret;
-}
-
static int do_mmu_notifier_register(struct mmu_notifier *mn,
struct mm_struct *mm,
int take_mmap_sem)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f986e31fa68c..636462edc7ca 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -499,7 +499,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
}

static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
- .flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
--
2.18.0


2018-08-27 11:28:25

by Michal Hocko

[permalink] [raw]
Subject: [PATCH 2/3] mm, mmu_notifier: be explicit about range invalition non-blocking mode

From: Michal Hocko <[email protected]>

If invalidate_range_start is called for !blocking mode then all
callbacks have to guarantee they will no block/sleep. The same obviously
applies to invalidate_range_end because this operation pairs with the
former and they are called from the same context. Make sure this is
appropriately documented.

Cc: Jerome Glisse <[email protected]>
Signed-off-by: Michal Hocko <[email protected]>
---
include/linux/mmu_notifier.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 133ba78820ee..698e371aafe3 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -153,7 +153,9 @@ struct mmu_notifier_ops {
*
* If blockable argument is set to false then the callback cannot
* sleep and has to return with -EAGAIN. 0 should be returned
- * otherwise.
+ * otherwise. Please note that if invalidate_range_start approves
+ * a non-blocking behavior then the same applies to
+ * invalidate_range_end.
*
*/
int (*invalidate_range_start)(struct mmu_notifier *mn,
--
2.18.0


2018-08-27 11:28:58

by Michal Hocko

[permalink] [raw]
Subject: [PATCH 1/3] xen/gntdev: fix up blockable calls to mn_invl_range_start

From: Michal Hocko <[email protected]>

93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
has introduced blockable parameter to all mmu_notifiers and the notifier
has to back off when called in !blockable case and it could block down
the road.

The above commit implemented that for mn_invl_range_start but both
in_range checks are done unconditionally regardless of the blockable
mode and as such they would fail all the time for regular calls.
Fix this by checking blockable parameter as well.

Once we are there we can remove the stale TODO. The lock has to be
sleepable because we wait for completion down in gnttab_unmap_refs_sync.

Changes since v1
- pull in_range check into mn_invl_range_start - Juergen

Fixes: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
Cc: Boris Ostrovsky <[email protected]>
Cc: Juergen Gross <[email protected]>
Signed-off-by: Michal Hocko <[email protected]>
---
drivers/xen/gntdev.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 57390c7666e5..b0b02a501167 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -492,12 +492,19 @@ static bool in_range(struct gntdev_grant_map *map,
return true;
}

-static void unmap_if_in_range(struct gntdev_grant_map *map,
- unsigned long start, unsigned long end)
+static int unmap_if_in_range(struct gntdev_grant_map *map,
+ unsigned long start, unsigned long end,
+ bool blockable)
{
unsigned long mstart, mend;
int err;

+ if (!in_range(map, start, end))
+ return 0;
+
+ if (!blockable)
+ return -EAGAIN;
+
mstart = max(start, map->vma->vm_start);
mend = min(end, map->vma->vm_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
@@ -508,6 +515,8 @@ static void unmap_if_in_range(struct gntdev_grant_map *map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
WARN_ON(err);
+
+ return 0;
}

static int mn_invl_range_start(struct mmu_notifier *mn,
@@ -519,25 +528,20 @@ static int mn_invl_range_start(struct mmu_notifier *mn,
struct gntdev_grant_map *map;
int ret = 0;

- /* TODO do we really need a mutex here? */
if (blockable)
mutex_lock(&priv->lock);
else if (!mutex_trylock(&priv->lock))
return -EAGAIN;

list_for_each_entry(map, &priv->maps, next) {
- if (in_range(map, start, end)) {
- ret = -EAGAIN;
+ ret = unmap_if_in_range(map, start, end, blockable);
+ if (ret)
goto out_unlock;
- }
- unmap_if_in_range(map, start, end);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
- if (in_range(map, start, end)) {
- ret = -EAGAIN;
+ ret = unmap_if_in_range(map, start, end, blockable);
+ if (ret)
goto out_unlock;
- }
- unmap_if_in_range(map, start, end);
}

out_unlock:
--
2.18.0


2018-08-27 13:44:01

by Jerome Glisse

[permalink] [raw]
Subject: Re: [PATCH 2/3] mm, mmu_notifier: be explicit about range invalition non-blocking mode

On Mon, Aug 27, 2018 at 01:26:22PM +0200, Michal Hocko wrote:
> From: Michal Hocko <[email protected]>
>
> If invalidate_range_start is called for !blocking mode then all
> callbacks have to guarantee they will no block/sleep. The same obviously
> applies to invalidate_range_end because this operation pairs with the
> former and they are called from the same context. Make sure this is
> appropriately documented.
>
> Signed-off-by: Michal Hocko <[email protected]>

Reviewed-by: Jerome Glisse <[email protected]>


> ---
> include/linux/mmu_notifier.h | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
> index 133ba78820ee..698e371aafe3 100644
> --- a/include/linux/mmu_notifier.h
> +++ b/include/linux/mmu_notifier.h
> @@ -153,7 +153,9 @@ struct mmu_notifier_ops {
> *
> * If blockable argument is set to false then the callback cannot
> * sleep and has to return with -EAGAIN. 0 should be returned
> - * otherwise.
> + * otherwise. Please note that if invalidate_range_start approves
> + * a non-blocking behavior then the same applies to
> + * invalidate_range_end.
> *
> */
> int (*invalidate_range_start)(struct mmu_notifier *mn,
> --
> 2.18.0
>

2018-08-27 20:05:27

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH 1/3] xen/gntdev: fix up blockable calls to mn_invl_range_start

On 08/27/2018 07:26 AM, Michal Hocko wrote:
> From: Michal Hocko <[email protected]>
>
> 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
> has introduced blockable parameter to all mmu_notifiers and the notifier
> has to back off when called in !blockable case and it could block down
> the road.
>
> The above commit implemented that for mn_invl_range_start but both
> in_range checks are done unconditionally regardless of the blockable
> mode and as such they would fail all the time for regular calls.
> Fix this by checking blockable parameter as well.
>
> Once we are there we can remove the stale TODO. The lock has to be
> sleepable because we wait for completion down in gnttab_unmap_refs_sync.
>
> Changes since v1
> - pull in_range check into mn_invl_range_start - Juergen
>
> Fixes: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
> Cc: Boris Ostrovsky <[email protected]>
> Cc: Juergen Gross <[email protected]>
> Signed-off-by: Michal Hocko <[email protected]>


LGTM, although in_range() has a single call site so we really don't need it.

I'll wait for Juergen to bless this since he asked for this approach.

-boris


> ---
> drivers/xen/gntdev.c | 26 +++++++++++++++-----------
> 1 file changed, 15 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index 57390c7666e5..b0b02a501167 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -492,12 +492,19 @@ static bool in_range(struct gntdev_grant_map *map,
> return true;
> }
>
> -static void unmap_if_in_range(struct gntdev_grant_map *map,
> - unsigned long start, unsigned long end)
> +static int unmap_if_in_range(struct gntdev_grant_map *map,
> + unsigned long start, unsigned long end,
> + bool blockable)
> {
> unsigned long mstart, mend;
> int err;
>
> + if (!in_range(map, start, end))
> + return 0;
> +
> + if (!blockable)
> + return -EAGAIN;
> +
> mstart = max(start, map->vma->vm_start);
> mend = min(end, map->vma->vm_end);
> pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
> @@ -508,6 +515,8 @@ static void unmap_if_in_range(struct gntdev_grant_map *map,
> (mstart - map->vma->vm_start) >> PAGE_SHIFT,
> (mend - mstart) >> PAGE_SHIFT);
> WARN_ON(err);
> +
> + return 0;
> }
>
> static int mn_invl_range_start(struct mmu_notifier *mn,
> @@ -519,25 +528,20 @@ static int mn_invl_range_start(struct mmu_notifier *mn,
> struct gntdev_grant_map *map;
> int ret = 0;
>
> - /* TODO do we really need a mutex here? */
> if (blockable)
> mutex_lock(&priv->lock);
> else if (!mutex_trylock(&priv->lock))
> return -EAGAIN;
>
> list_for_each_entry(map, &priv->maps, next) {
> - if (in_range(map, start, end)) {
> - ret = -EAGAIN;
> + ret = unmap_if_in_range(map, start, end, blockable);
> + if (ret)
> goto out_unlock;
> - }
> - unmap_if_in_range(map, start, end);
> }
> list_for_each_entry(map, &priv->freeable_maps, next) {
> - if (in_range(map, start, end)) {
> - ret = -EAGAIN;
> + ret = unmap_if_in_range(map, start, end, blockable);
> + if (ret)
> goto out_unlock;
> - }
> - unmap_if_in_range(map, start, end);
> }
>
> out_unlock:


2018-08-28 06:05:23

by Jürgen Groß

[permalink] [raw]
Subject: Re: [PATCH 1/3] xen/gntdev: fix up blockable calls to mn_invl_range_start

On 27/08/18 13:26, Michal Hocko wrote:
> From: Michal Hocko <[email protected]>
>
> 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
> has introduced blockable parameter to all mmu_notifiers and the notifier
> has to back off when called in !blockable case and it could block down
> the road.
>
> The above commit implemented that for mn_invl_range_start but both
> in_range checks are done unconditionally regardless of the blockable
> mode and as such they would fail all the time for regular calls.
> Fix this by checking blockable parameter as well.
>
> Once we are there we can remove the stale TODO. The lock has to be
> sleepable because we wait for completion down in gnttab_unmap_refs_sync.
>
> Changes since v1
> - pull in_range check into mn_invl_range_start - Juergen
>
> Fixes: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
> Cc: Boris Ostrovsky <[email protected]>
> Cc: Juergen Gross <[email protected]>
> Signed-off-by: Michal Hocko <[email protected]>

Reviewed-by: Juergen Gross <[email protected]>


Juergen