2017-04-10 08:29:18

by Anshuman Khandual

[permalink] [raw]
Subject: [PATCH] mm/madvise: Clean up MADV_SOFT_OFFLINE and MADV_HWPOISON

This cleans up handling MADV_SOFT_OFFLINE and MADV_HWPOISON called
through madvise() system call.

* madvise_memory_failure() was misleading to accommodate handling of
both memory_failure() as well as soft_offline_page() functions.
Basically it handles memory error injection from user space which
can go either way as memory failure or soft offline. Renamed as
madvise_inject_error() instead.

* Renamed struct page pointer 'p' to 'page'.

* pr_info() was essentially printing PFN value but it said 'page'
which was misleading. Made the process virtual address explicit.

Before the patch:

[97216.813999] Soft offlining page 0x15e3e at 0x3fff8c230000
[97234.670320] Soft offlining page 0x1f3 at 0x3fffa0da0000
[97318.817426] Soft offlining page 0x744 at 0x3fff7d200000
[97319.537899] Soft offlining page 0x1634d at 0x3fff95e20000
[97319.538528] Soft offlining page 0x16349 at 0x3fff95e30000
[97326.714138] Soft offlining page 0x1d6 at 0x3fff9e8b0000
[97327.334351] Soft offlining page 0x5f3 at 0x3fff91bd0000

[97593.860913] Injecting memory failure for page 0x15c8b at 0x3fff83280000
[97593.861757] Injecting memory failure for page 0x16190 at 0x3fff83290000
[97594.430585] Injecting memory failure for page 0x740 at 0x3fff9a2e0000
[97594.431289] Injecting memory failure for page 0x741 at 0x3fff9a2f0000

After the patch:

[ 707.219172] Soft offlining pfn 0x1484e at process virtual address 0x3fff883c0000
[ 707.219178] Soft offlining pfn 0x1484f at process virtual address 0x3fff883d0000
[ 707.219185] Soft offlining pfn 0x14850 at process virtual address 0x3fff883e0000
[ 707.219192] Soft offlining pfn 0x14851 at process virtual address 0x3fff883f0000
[ 707.219199] Soft offlining pfn 0x14852 at process virtual address 0x3fff88400000
[ 707.219207] Soft offlining pfn 0x14853 at process virtual address 0x3fff88410000
[ 707.219214] Soft offlining pfn 0x14854 at process virtual address 0x3fff88420000
[ 710.231938] Soft offlining pfn 0x1521c at process virtual address 0x3fff6bc70000

[ 746.630823] Injecting memory failure for pfn 0x10fcf at process virtual address 0x3fff86310000
[ 746.630832] Injecting memory failure for pfn 0x10fd0 at process virtual address 0x3fff86320000
[ 746.630842] Injecting memory failure for pfn 0x10fd1 at process virtual address 0x3fff86330000
[ 746.630851] Injecting memory failure for pfn 0x10fd2 at process virtual address 0x3fff86340000
[ 746.630861] Injecting memory failure for pfn 0x10fd3 at process virtual address 0x3fff86350000
[ 746.630870] Injecting memory failure for pfn 0x10fd4 at process virtual address 0x3fff86360000
[ 746.630880] Injecting memory failure for pfn 0x10fd5 at process virtual address 0x3fff86370000

Signed-off-by: Anshuman Khandual <[email protected]>
---
mm/madvise.c | 34 ++++++++++++++++++++--------------
1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 7a2abf0..efd4721 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -606,34 +606,40 @@ static long madvise_remove(struct vm_area_struct *vma,
/*
* Error injection support for memory error handling.
*/
-static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
+static int madvise_inject_error(int behavior,
+ unsigned long start, unsigned long end)
{
- struct page *p;
+ struct page *page;
+
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+
for (; start < end; start += PAGE_SIZE <<
- compound_order(compound_head(p))) {
+ compound_order(compound_head(page))) {
int ret;

- ret = get_user_pages_fast(start, 1, 0, &p);
+ ret = get_user_pages_fast(start, 1, 0, &page);
if (ret != 1)
return ret;

- if (PageHWPoison(p)) {
- put_page(p);
+ if (PageHWPoison(page)) {
+ put_page(page);
continue;
}
- if (bhv == MADV_SOFT_OFFLINE) {
- pr_info("Soft offlining page %#lx at %#lx\n",
- page_to_pfn(p), start);
- ret = soft_offline_page(p, MF_COUNT_INCREASED);
+
+ if (behavior == MADV_SOFT_OFFLINE) {
+ pr_info("Soft offlining pfn %#lx at process virtual address %#lx\n",
+ page_to_pfn(page), start);
+
+ ret = soft_offline_page(page, MF_COUNT_INCREASED);
if (ret)
return ret;
continue;
}
- pr_info("Injecting memory failure for page %#lx at %#lx\n",
- page_to_pfn(p), start);
- ret = memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
+ pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n",
+ page_to_pfn(page), start);
+
+ ret = memory_failure(page_to_pfn(page), 0, MF_COUNT_INCREASED);
if (ret)
return ret;
}
@@ -763,7 +769,7 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)

#ifdef CONFIG_MEMORY_FAILURE
if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
- return madvise_hwpoison(behavior, start, start+len_in);
+ return madvise_inject_error(behavior, start, start + len_in);
#endif
if (!madvise_behavior_valid(behavior))
return error;
--
1.8.5.2


2017-04-10 08:48:06

by Anshuman Khandual

[permalink] [raw]
Subject: [PATCH RESEND] mm/madvise: Clean up MADV_SOFT_OFFLINE and MADV_HWPOISON

This cleans up handling MADV_SOFT_OFFLINE and MADV_HWPOISON called
through madvise() system call.

* madvise_memory_failure() was misleading to accommodate handling of
both memory_failure() as well as soft_offline_page() functions.
Basically it handles memory error injection from user space which
can go either way as memory failure or soft offline. Renamed as
madvise_inject_error() instead.

* Renamed struct page pointer 'p' to 'page'.

* pr_info() was essentially printing PFN value but it said 'page'
which was misleading. Made the process virtual address explicit.

Before the patch:

Soft offlining page 0x15e3e at 0x3fff8c230000
Soft offlining page 0x1f3 at 0x3fffa0da0000
Soft offlining page 0x744 at 0x3fff7d200000
Soft offlining page 0x1634d at 0x3fff95e20000
Soft offlining page 0x16349 at 0x3fff95e30000
Soft offlining page 0x1d6 at 0x3fff9e8b0000
Soft offlining page 0x5f3 at 0x3fff91bd0000

Injecting memory failure for page 0x15c8b at 0x3fff83280000
Injecting memory failure for page 0x16190 at 0x3fff83290000
Injecting memory failure for page 0x740 at 0x3fff9a2e0000
Injecting memory failure for page 0x741 at 0x3fff9a2f0000

After the patch:

Soft offlining pfn 0x1484e at process virtual address 0x3fff883c0000
Soft offlining pfn 0x1484f at process virtual address 0x3fff883d0000
Soft offlining pfn 0x14850 at process virtual address 0x3fff883e0000
Soft offlining pfn 0x14851 at process virtual address 0x3fff883f0000
Soft offlining pfn 0x14852 at process virtual address 0x3fff88400000
Soft offlining pfn 0x14853 at process virtual address 0x3fff88410000
Soft offlining pfn 0x14854 at process virtual address 0x3fff88420000
Soft offlining pfn 0x1521c at process virtual address 0x3fff6bc70000

Injecting memory failure for pfn 0x10fcf at process virtual address 0x3fff86310000
Injecting memory failure for pfn 0x10fd0 at process virtual address 0x3fff86320000
Injecting memory failure for pfn 0x10fd1 at process virtual address 0x3fff86330000
Injecting memory failure for pfn 0x10fd2 at process virtual address 0x3fff86340000
Injecting memory failure for pfn 0x10fd3 at process virtual address 0x3fff86350000
Injecting memory failure for pfn 0x10fd4 at process virtual address 0x3fff86360000
Injecting memory failure for pfn 0x10fd5 at process virtual address 0x3fff86370000

Signed-off-by: Anshuman Khandual <[email protected]>
---
Removed timestamp from the kernel log to reduce the width of the
commit message. No changes in the code.

mm/madvise.c | 34 ++++++++++++++++++++--------------
1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 7a2abf0..efd4721 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -606,34 +606,40 @@ static long madvise_remove(struct vm_area_struct *vma,
/*
* Error injection support for memory error handling.
*/
-static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
+static int madvise_inject_error(int behavior,
+ unsigned long start, unsigned long end)
{
- struct page *p;
+ struct page *page;
+
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+
for (; start < end; start += PAGE_SIZE <<
- compound_order(compound_head(p))) {
+ compound_order(compound_head(page))) {
int ret;

- ret = get_user_pages_fast(start, 1, 0, &p);
+ ret = get_user_pages_fast(start, 1, 0, &page);
if (ret != 1)
return ret;

- if (PageHWPoison(p)) {
- put_page(p);
+ if (PageHWPoison(page)) {
+ put_page(page);
continue;
}
- if (bhv == MADV_SOFT_OFFLINE) {
- pr_info("Soft offlining page %#lx at %#lx\n",
- page_to_pfn(p), start);
- ret = soft_offline_page(p, MF_COUNT_INCREASED);
+
+ if (behavior == MADV_SOFT_OFFLINE) {
+ pr_info("Soft offlining pfn %#lx at process virtual address %#lx\n",
+ page_to_pfn(page), start);
+
+ ret = soft_offline_page(page, MF_COUNT_INCREASED);
if (ret)
return ret;
continue;
}
- pr_info("Injecting memory failure for page %#lx at %#lx\n",
- page_to_pfn(p), start);
- ret = memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
+ pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n",
+ page_to_pfn(page), start);
+
+ ret = memory_failure(page_to_pfn(page), 0, MF_COUNT_INCREASED);
if (ret)
return ret;
}
@@ -763,7 +769,7 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)

#ifdef CONFIG_MEMORY_FAILURE
if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
- return madvise_hwpoison(behavior, start, start+len_in);
+ return madvise_inject_error(behavior, start, start + len_in);
#endif
if (!madvise_behavior_valid(behavior))
return error;
--
1.8.5.2

2017-04-12 04:55:27

by Naoya Horiguchi

[permalink] [raw]
Subject: Re: [PATCH RESEND] mm/madvise: Clean up MADV_SOFT_OFFLINE and MADV_HWPOISON

On Mon, Apr 10, 2017 at 02:17:01PM +0530, Anshuman Khandual wrote:
> This cleans up handling MADV_SOFT_OFFLINE and MADV_HWPOISON called
> through madvise() system call.
>
> * madvise_memory_failure() was misleading to accommodate handling of
> both memory_failure() as well as soft_offline_page() functions.
> Basically it handles memory error injection from user space which
> can go either way as memory failure or soft offline. Renamed as
> madvise_inject_error() instead.
>
> * Renamed struct page pointer 'p' to 'page'.
>
> * pr_info() was essentially printing PFN value but it said 'page'
> which was misleading. Made the process virtual address explicit.
>
> Before the patch:
>
> Soft offlining page 0x15e3e at 0x3fff8c230000
> Soft offlining page 0x1f3 at 0x3fffa0da0000
> Soft offlining page 0x744 at 0x3fff7d200000
> Soft offlining page 0x1634d at 0x3fff95e20000
> Soft offlining page 0x16349 at 0x3fff95e30000
> Soft offlining page 0x1d6 at 0x3fff9e8b0000
> Soft offlining page 0x5f3 at 0x3fff91bd0000
>
> Injecting memory failure for page 0x15c8b at 0x3fff83280000
> Injecting memory failure for page 0x16190 at 0x3fff83290000
> Injecting memory failure for page 0x740 at 0x3fff9a2e0000
> Injecting memory failure for page 0x741 at 0x3fff9a2f0000
>
> After the patch:
>
> Soft offlining pfn 0x1484e at process virtual address 0x3fff883c0000
> Soft offlining pfn 0x1484f at process virtual address 0x3fff883d0000
> Soft offlining pfn 0x14850 at process virtual address 0x3fff883e0000
> Soft offlining pfn 0x14851 at process virtual address 0x3fff883f0000
> Soft offlining pfn 0x14852 at process virtual address 0x3fff88400000
> Soft offlining pfn 0x14853 at process virtual address 0x3fff88410000
> Soft offlining pfn 0x14854 at process virtual address 0x3fff88420000
> Soft offlining pfn 0x1521c at process virtual address 0x3fff6bc70000
>
> Injecting memory failure for pfn 0x10fcf at process virtual address 0x3fff86310000
> Injecting memory failure for pfn 0x10fd0 at process virtual address 0x3fff86320000
> Injecting memory failure for pfn 0x10fd1 at process virtual address 0x3fff86330000
> Injecting memory failure for pfn 0x10fd2 at process virtual address 0x3fff86340000
> Injecting memory failure for pfn 0x10fd3 at process virtual address 0x3fff86350000
> Injecting memory failure for pfn 0x10fd4 at process virtual address 0x3fff86360000
> Injecting memory failure for pfn 0x10fd5 at process virtual address 0x3fff86370000
>
> Signed-off-by: Anshuman Khandual <[email protected]>

Reviewed-by: Naoya Horiguchi <[email protected]>

> ---
> Removed timestamp from the kernel log to reduce the width of the
> commit message. No changes in the code.
>
> mm/madvise.c | 34 ++++++++++++++++++++--------------
> 1 file changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/mm/madvise.c b/mm/madvise.c
> index 7a2abf0..efd4721 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -606,34 +606,40 @@ static long madvise_remove(struct vm_area_struct *vma,
> /*
> * Error injection support for memory error handling.
> */
> -static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
> +static int madvise_inject_error(int behavior,
> + unsigned long start, unsigned long end)
> {
> - struct page *p;
> + struct page *page;
> +
> if (!capable(CAP_SYS_ADMIN))
> return -EPERM;
> +
> for (; start < end; start += PAGE_SIZE <<
> - compound_order(compound_head(p))) {
> + compound_order(compound_head(page))) {
> int ret;
>
> - ret = get_user_pages_fast(start, 1, 0, &p);
> + ret = get_user_pages_fast(start, 1, 0, &page);
> if (ret != 1)
> return ret;
>
> - if (PageHWPoison(p)) {
> - put_page(p);
> + if (PageHWPoison(page)) {
> + put_page(page);
> continue;
> }
> - if (bhv == MADV_SOFT_OFFLINE) {
> - pr_info("Soft offlining page %#lx at %#lx\n",
> - page_to_pfn(p), start);
> - ret = soft_offline_page(p, MF_COUNT_INCREASED);
> +
> + if (behavior == MADV_SOFT_OFFLINE) {
> + pr_info("Soft offlining pfn %#lx at process virtual address %#lx\n",
> + page_to_pfn(page), start);
> +
> + ret = soft_offline_page(page, MF_COUNT_INCREASED);
> if (ret)
> return ret;
> continue;
> }
> - pr_info("Injecting memory failure for page %#lx at %#lx\n",
> - page_to_pfn(p), start);
> - ret = memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
> + pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n",
> + page_to_pfn(page), start);
> +
> + ret = memory_failure(page_to_pfn(page), 0, MF_COUNT_INCREASED);
> if (ret)
> return ret;
> }
> @@ -763,7 +769,7 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
>
> #ifdef CONFIG_MEMORY_FAILURE
> if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
> - return madvise_hwpoison(behavior, start, start+len_in);
> + return madvise_inject_error(behavior, start, start + len_in);
> #endif
> if (!madvise_behavior_valid(behavior))
> return error;
> --
> 1.8.5.2
>
>