2012-02-20 06:01:21

by Li Zhong

[permalink] [raw]
Subject: [PATCH 0/2 x86] fix some page faults in nmi if kmemcheck is enabled

If CONFIG_KMEMCHECK is enabled, there might be page faults in nmi if the
pages are marked as not present by kmemcheck, like following:

[ 4.535803] WARNING: at arch/x86/mm/kmemcheck/kmemcheck.c:634 kmemcheck_fault+0xb9/0xd0()
[ 4.633429] Hardware name: System x3650 M3 -[7945AC1]-
[ 4.694710] Modules linked in:
[ 4.731105] Pid: 1, comm: swapper/0 Not tainted 3.3.0-rc3 #15
[ 4.799654] Call Trace:
[ 4.828751] <NMI> [<ffffffff81042eca>] warn_slowpath_common+0x7a/0xb0
[ 4.907713] [<ffffffff81042f15>] warn_slowpath_null+0x15/0x20
[ 4.977301] [<ffffffff8103ce89>] kmemcheck_fault+0xb9/0xd0
[ 5.043778] [<ffffffff81551ba6>] do_page_fault+0x406/0x550
[ 5.110252] [<ffffffff8154e235>] page_fault+0x25/0x30
[ 5.171535] [<ffffffff8154f005>] ? nmi_handle.clone.1+0x75/0xc0
[ 5.243202] [<ffffffff8154efcf>] ? nmi_handle.clone.1+0x3f/0xc0
[ 5.314867] [<ffffffff8154ef90>] ? __die+0xf0/0xf0
[ 5.373038] [<ffffffff8154f15f>] do_nmi+0x10f/0x360
[ 5.432243] [<ffffffff8154e5cd>] restart_nmi+0x1a/0x1e
[ 5.494565] [<ffffffff8154e210>] ? general_protection+0x30/0x30
[ 5.566234] [<ffffffff8154e210>] ? general_protection+0x30/0x30
[ 5.637898] [<ffffffff8154e210>] ? general_protection+0x30/0x30
[ 5.709566] <<EOE>> [<ffffffff8126d814>] ? rb_insert_color+0xa4/0x150
[ 5.788526] [<ffffffff8119d17b>] sysfs_link_sibling+0x8b/0x110
[ 5.859155] [<ffffffff8119dff1>] __sysfs_add_one+0xc1/0x100
[ 5.926666] [<ffffffff8119e056>] sysfs_add_one+0x26/0xd0
[ 5.991065] [<ffffffff8119cdf4>] sysfs_add_file_mode+0xc4/0x100
[ 6.062731] [<ffffffff8119fc41>] internal_create_group+0xc1/0x1a0
[ 6.136473] [<ffffffff8119fd4e>] sysfs_create_group+0xe/0x10
[ 6.205026] [<ffffffff81351c1a>] dpm_sysfs_add+0x2a/0xd0
[ 6.269425] [<ffffffff81349bf5>] device_add+0x5e5/0x730
[ 6.332783] [<ffffffff81349d59>] device_register+0x19/0x20
[ 6.399260] [<ffffffff8135b6b8>] add_memory_section+0x158/0x1e0
[ 6.470927] [<ffffffff81ca757e>] memory_dev_init+0x75/0x108
[ 6.538439] [<ffffffff81ca73a9>] driver_init+0x31/0x33
[ 6.600762] [<ffffffff81c72c68>] kernel_init+0xcc/0x169
[ 6.664121] [<ffffffff81555e64>] kernel_thread_helper+0x4/0x10
[ 6.734749] [<ffffffff81c72b9c>] ? start_kernel+0x3ab/0x3ab
[ 6.802261] [<ffffffff81555e60>] ? gs_change+0x13/0x13
[ 6.864585] ---[ end trace a7919e7f17c0a725 ]---

These two patches tries to fix some of the problems by avoiding using the
non-present pages.


2012-02-20 06:05:28

by Li Zhong

[permalink] [raw]
Subject: [PATCH 1/2 x86] fix page faults by nmi handler in nmi if kmemcheck is enabled

This patch tries to
change the nmi handler name from a pointer to an array.
use __get_free_pages instead of kzalloc if CONFIG_KMEMCHECK is
enabled.

Signed-off-by: Li Zhong <[email protected]>
---
arch/x86/kernel/nmi.c | 23 +++++++++++++++--------
1 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 47acaf3..84aa03a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -36,7 +36,7 @@ struct nmiaction {
struct list_head list;
nmi_handler_t handler;
unsigned int flags;
- char *name;
+ char name[NMI_MAX_NAMELEN];
};

struct nmi_desc {
@@ -169,16 +169,18 @@ int register_nmi_handler(unsigned int type,
nmi_handler_t handler,
if (!handler)
return -EINVAL;

+#ifdef CONFIG_KMEMCHECK
+ action = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(sizeof(*action)));
+#else
action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
+#endif
if (!action)
goto fail_action;

action->handler = handler;
action->flags = nmiflags;
- action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
- if (!action->name)
- goto fail_action_name;
-
+ strncpy(action->name, devname, sizeof(action->name));
retval = __setup_nmi(type, action);

if (retval)
@@ -187,9 +189,11 @@ int register_nmi_handler(unsigned int type,
nmi_handler_t handler,
return retval;

fail_setup_nmi:
- kfree(action->name);
-fail_action_name:
+#ifdef CONFIG_KMEMCHECK
+ free_pages((unsigned long)action, get_order(sizeof(*action)));
+#else
kfree(action);
+#endif
fail_action:

return retval;
@@ -202,8 +206,11 @@ void unregister_nmi_handler(unsigned int type,
const char *name)

a = __free_nmi(type, name);
if (a) {
- kfree(a->name);
+#ifdef CONFIG_KMEMCHECK
+ free_pages((unsigned long)a, get_order(sizeof(*a)));
+#else
kfree(a);
+#endif
}
}

--
1.7.5.4

2012-02-20 06:07:55

by Li Zhong

[permalink] [raw]
Subject: [PATCH 2/2 x86] fix page faults by perf events in nmi if kmemcheck is enabled

This patch tries to use __get_free_pages for the perf_event and it's
callchain buffers if CONFIG_KMEMCHECK is enabled.

Signed-off-by: Li Zhong <[email protected]>
---
kernel/events/callchain.c | 29 +++++++++++++++++++++++++++++
kernel/events/core.c | 13 +++++++++++++
2 files changed, 42 insertions(+), 0 deletions(-)

diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 6581a04..57f5eaa 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -38,13 +38,25 @@ static void release_callchain_buffers_rcu(struct
rcu_head *head)
{
struct callchain_cpus_entries *entries;
int cpu;
+#ifdef CONFIG_KMEMCHECK
+ int size;
+
+ size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+#endif

entries = container_of(head, struct callchain_cpus_entries, rcu_head);

for_each_possible_cpu(cpu)
+#ifdef CONFIG_KMEMCHECK
+ free_pages((unsigned long)entries->cpu_entries[cpu],
+ get_order(size));
+ size = offsetof(struct callchain_cpus_entries,
cpu_entries[nr_cpu_ids]);
+ free_pages((unsigned long)entries, get_order(size));
+#else
kfree(entries->cpu_entries[cpu]);

kfree(entries);
+#endif
}

static void release_callchain_buffers(void)
@@ -69,15 +81,25 @@ static int alloc_callchain_buffers(void)
*/
size = offsetof(struct callchain_cpus_entries,
cpu_entries[nr_cpu_ids]);

+#ifdef CONFIG_KMEMCHECK
+ entries = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(size));
+#else
entries = kzalloc(size, GFP_KERNEL);
+#endif
if (!entries)
return -ENOMEM;

size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;

for_each_possible_cpu(cpu) {
+#ifdef CONFIG_KMEMCHECK
+ entries->cpu_entries[cpu] = (void *)__get_free_pages(
+ GFP_KERNEL, get_order(size));
+#else
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
cpu_to_node(cpu));
+#endif
if (!entries->cpu_entries[cpu])
goto fail;
}
@@ -88,8 +110,15 @@ static int alloc_callchain_buffers(void)

fail:
for_each_possible_cpu(cpu)
+#ifdef CONFIG_KMEMCHECK
+ free_pages((unsigned long)entries->cpu_entries[cpu],
+ get_order(size));
+ size = offsetof(struct callchain_cpus_entries,
cpu_entries[nr_cpu_ids]);
+ free_pages((unsigned long)entries, get_order(size));
+#else
kfree(entries->cpu_entries[cpu]);
kfree(entries);
+#endif

return -ENOMEM;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ba36013..841c2ab 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2758,7 +2758,11 @@ static void free_event_rcu(struct rcu_head *head)
if (event->ns)
put_pid_ns(event->ns);
perf_event_free_filter(event);
+#ifdef CONFIG_KMEMCHECK
+ free_pages((unsigned long)event, get_order(sizeof(*event)));
+#else
kfree(event);
+#endif
}

static void ring_buffer_put(struct ring_buffer *rb);
@@ -5723,7 +5727,12 @@ perf_event_alloc(struct perf_event_attr *attr,
int cpu,
return ERR_PTR(-EINVAL);
}

+#ifdef CONFIG_KMEMCHECK
+ event = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(sizeof(*event)));
+#else
event = kzalloc(sizeof(*event), GFP_KERNEL);
+#endif
if (!event)
return ERR_PTR(-ENOMEM);

@@ -5810,7 +5819,11 @@ done:
if (err) {
if (event->ns)
put_pid_ns(event->ns);
+#ifdef CONFIG_KMEMCHECK
+ free_pages((unsigned long)event, get_order(sizeof(*event)));
+#else
kfree(event);
+#endif
return ERR_PTR(err);
}

--
1.7.5.4

2012-02-20 11:01:07

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 0/2 x86] fix some page faults in nmi if kmemcheck is enabled

On Mon, 2012-02-20 at 14:01 +0800, Li Zhong wrote:
> If CONFIG_KMEMCHECK is enabled, there might be page faults in nmi if the
> pages are marked as not present by kmemcheck, like following:
>
> [ 4.535803] WARNING: at arch/x86/mm/kmemcheck/kmemcheck.c:634 kmemcheck_fault+0xb9/0xd0()
> [ 4.633429] Hardware name: System x3650 M3 -[7945AC1]-
> [ 4.694710] Modules linked in:
> [ 4.731105] Pid: 1, comm: swapper/0 Not tainted 3.3.0-rc3 #15
> [ 4.799654] Call Trace:
> [ 4.828751] <NMI> [<ffffffff81042eca>] warn_slowpath_common+0x7a/0xb0
> [ 4.907713] [<ffffffff81042f15>] warn_slowpath_null+0x15/0x20
> [ 4.977301] [<ffffffff8103ce89>] kmemcheck_fault+0xb9/0xd0
> [ 5.043778] [<ffffffff81551ba6>] do_page_fault+0x406/0x550
> [ 5.110252] [<ffffffff8154e235>] page_fault+0x25/0x30
> [ 5.171535] [<ffffffff8154f005>] ? nmi_handle.clone.1+0x75/0xc0
> [ 5.243202] [<ffffffff8154efcf>] ? nmi_handle.clone.1+0x3f/0xc0
> [ 5.314867] [<ffffffff8154ef90>] ? __die+0xf0/0xf0
> [ 5.373038] [<ffffffff8154f15f>] do_nmi+0x10f/0x360
> [ 5.432243] [<ffffffff8154e5cd>] restart_nmi+0x1a/0x1e
> [ 5.494565] [<ffffffff8154e210>] ? general_protection+0x30/0x30
> [ 5.566234] [<ffffffff8154e210>] ? general_protection+0x30/0x30
> [ 5.637898] [<ffffffff8154e210>] ? general_protection+0x30/0x30
> [ 5.709566] <<EOE>> [<ffffffff8126d814>] ? rb_insert_color+0xa4/0x150
> [ 5.788526] [<ffffffff8119d17b>] sysfs_link_sibling+0x8b/0x110
> [ 5.859155] [<ffffffff8119dff1>] __sysfs_add_one+0xc1/0x100
> [ 5.926666] [<ffffffff8119e056>] sysfs_add_one+0x26/0xd0
> [ 5.991065] [<ffffffff8119cdf4>] sysfs_add_file_mode+0xc4/0x100
> [ 6.062731] [<ffffffff8119fc41>] internal_create_group+0xc1/0x1a0
> [ 6.136473] [<ffffffff8119fd4e>] sysfs_create_group+0xe/0x10
> [ 6.205026] [<ffffffff81351c1a>] dpm_sysfs_add+0x2a/0xd0
> [ 6.269425] [<ffffffff81349bf5>] device_add+0x5e5/0x730
> [ 6.332783] [<ffffffff81349d59>] device_register+0x19/0x20
> [ 6.399260] [<ffffffff8135b6b8>] add_memory_section+0x158/0x1e0
> [ 6.470927] [<ffffffff81ca757e>] memory_dev_init+0x75/0x108
> [ 6.538439] [<ffffffff81ca73a9>] driver_init+0x31/0x33
> [ 6.600762] [<ffffffff81c72c68>] kernel_init+0xcc/0x169
> [ 6.664121] [<ffffffff81555e64>] kernel_thread_helper+0x4/0x10
> [ 6.734749] [<ffffffff81c72b9c>] ? start_kernel+0x3ab/0x3ab
> [ 6.802261] [<ffffffff81555e60>] ? gs_change+0x13/0x13
> [ 6.864585] ---[ end trace a7919e7f17c0a725 ]---
>
> These two patches tries to fix some of the problems by avoiding using the
> non-present pages.


Hell no, these are some of the ugliest patches I've seen in a while. Not
to mention that their changelogs are utter crap since they don't even
explain why they're doing what they're doing.

2012-02-21 01:42:52

by Li Zhong

[permalink] [raw]
Subject: Re: [PATCH 0/2 x86] fix some page faults in nmi if kmemcheck is enabled

On Mon, 2012-02-20 at 12:00 +0100, Peter Zijlstra wrote:
> On Mon, 2012-02-20 at 14:01 +0800, Li Zhong wrote:
> > If CONFIG_KMEMCHECK is enabled, there might be page faults in nmi if the
> > pages are marked as not present by kmemcheck, like following:
> >
> > [ 4.535803] WARNING: at arch/x86/mm/kmemcheck/kmemcheck.c:634 kmemcheck_fault+0xb9/0xd0()
> > [ 4.633429] Hardware name: System x3650 M3 -[7945AC1]-
> > [ 4.694710] Modules linked in:
> > [ 4.731105] Pid: 1, comm: swapper/0 Not tainted 3.3.0-rc3 #15
> > [ 4.799654] Call Trace:
> > [ 4.828751] <NMI> [<ffffffff81042eca>] warn_slowpath_common+0x7a/0xb0
> > [ 4.907713] [<ffffffff81042f15>] warn_slowpath_null+0x15/0x20
> > [ 4.977301] [<ffffffff8103ce89>] kmemcheck_fault+0xb9/0xd0
> > [ 5.043778] [<ffffffff81551ba6>] do_page_fault+0x406/0x550
> > [ 5.110252] [<ffffffff8154e235>] page_fault+0x25/0x30
> > [ 5.171535] [<ffffffff8154f005>] ? nmi_handle.clone.1+0x75/0xc0
> > [ 5.243202] [<ffffffff8154efcf>] ? nmi_handle.clone.1+0x3f/0xc0
> > [ 5.314867] [<ffffffff8154ef90>] ? __die+0xf0/0xf0
> > [ 5.373038] [<ffffffff8154f15f>] do_nmi+0x10f/0x360
> > [ 5.432243] [<ffffffff8154e5cd>] restart_nmi+0x1a/0x1e
> > [ 5.494565] [<ffffffff8154e210>] ? general_protection+0x30/0x30
> > [ 5.566234] [<ffffffff8154e210>] ? general_protection+0x30/0x30
> > [ 5.637898] [<ffffffff8154e210>] ? general_protection+0x30/0x30
> > [ 5.709566] <<EOE>> [<ffffffff8126d814>] ? rb_insert_color+0xa4/0x150
> > [ 5.788526] [<ffffffff8119d17b>] sysfs_link_sibling+0x8b/0x110
> > [ 5.859155] [<ffffffff8119dff1>] __sysfs_add_one+0xc1/0x100
> > [ 5.926666] [<ffffffff8119e056>] sysfs_add_one+0x26/0xd0
> > [ 5.991065] [<ffffffff8119cdf4>] sysfs_add_file_mode+0xc4/0x100
> > [ 6.062731] [<ffffffff8119fc41>] internal_create_group+0xc1/0x1a0
> > [ 6.136473] [<ffffffff8119fd4e>] sysfs_create_group+0xe/0x10
> > [ 6.205026] [<ffffffff81351c1a>] dpm_sysfs_add+0x2a/0xd0
> > [ 6.269425] [<ffffffff81349bf5>] device_add+0x5e5/0x730
> > [ 6.332783] [<ffffffff81349d59>] device_register+0x19/0x20
> > [ 6.399260] [<ffffffff8135b6b8>] add_memory_section+0x158/0x1e0
> > [ 6.470927] [<ffffffff81ca757e>] memory_dev_init+0x75/0x108
> > [ 6.538439] [<ffffffff81ca73a9>] driver_init+0x31/0x33
> > [ 6.600762] [<ffffffff81c72c68>] kernel_init+0xcc/0x169
> > [ 6.664121] [<ffffffff81555e64>] kernel_thread_helper+0x4/0x10
> > [ 6.734749] [<ffffffff81c72b9c>] ? start_kernel+0x3ab/0x3ab
> > [ 6.802261] [<ffffffff81555e60>] ? gs_change+0x13/0x13
> > [ 6.864585] ---[ end trace a7919e7f17c0a725 ]---
> >
> > These two patches tries to fix some of the problems by avoiding using the
> > non-present pages.
>
>
> Hell no, these are some of the ugliest patches I've seen in a while. Not
> to mention that their changelogs are utter crap since they don't even
> explain why they're doing what they're doing.
>
Hi Peter,

I agree that the fix is ugly. I'm willing to change if there are some
better ways.

The problem here is:
1. It seems x86 doesn't allow page faults in nmi, and there are checks
in the code, like WARN_ON_ONCE(in_nmi()).

2. If CONFIG_KMEMCHECK is enabled, the pages allocated through slab will
be marked as non-present, to capture uninitialized memory access. More
information in Documentation/kmemcheck.txt .

3. From the log, there are some memories accessed in nmi, which are in
pages marked as non-present by kmemcheck, as they are allocated by
something like kmalloc().

Thanks,
Zhong

2012-02-21 10:17:41

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 0/2 x86] fix some page faults in nmi if kmemcheck is enabled

On Tue, 2012-02-21 at 09:42 +0800, Li Zhong wrote:

> > Hell no, these are some of the ugliest patches I've seen in a while. Not
> > to mention that their changelogs are utter crap since they don't even
> > explain why they're doing what they're doing.
> >
> Hi Peter,
>
> I agree that the fix is ugly. I'm willing to change if there are some
> better ways.

There are always better ways..

> The problem here is:
> 1. It seems x86 doesn't allow page faults in nmi, and there are checks
> in the code, like WARN_ON_ONCE(in_nmi()).

I bet that's not x86 only..

> 2. If CONFIG_KMEMCHECK is enabled, the pages allocated through slab will
> be marked as non-present, to capture uninitialized memory access. More
> information in Documentation/kmemcheck.txt .

So then kmemcheck is buggy, since the nmiaction structure is initialized
in register_nmi_handler(), so it should most definitely not be marked
non-present.

> 3. From the log, there are some memories accessed in nmi, which are in
> pages marked as non-present by kmemcheck, as they are allocated by
> something like kmalloc().

So figure out why and fix that instead of writing ugly ass patches that
seemingly work around the problem without actually thinking about it.

2012-02-23 10:08:37

by Li Zhong

[permalink] [raw]
Subject: Re: [PATCH 0/2 x86] fix some page faults in nmi if kmemcheck is enabled

On Tue, 2012-02-21 at 11:17 +0100, Peter Zijlstra wrote:
> On Tue, 2012-02-21 at 09:42 +0800, Li Zhong wrote:
>
> > > Hell no, these are some of the ugliest patches I've seen in a while. Not
> > > to mention that their changelogs are utter crap since they don't even
> > > explain why they're doing what they're doing.
> > >
> > Hi Peter,
> >
> > I agree that the fix is ugly. I'm willing to change if there are some
> > better ways.
>
> There are always better ways..

Hi Peter,

I will think further about it, and would appreciate it if you could give
some good ideas.

>
> > The problem here is:
> > 1. It seems x86 doesn't allow page faults in nmi, and there are checks
> > in the code, like WARN_ON_ONCE(in_nmi()).
>
> I bet that's not x86 only..

Maybe, I found this problem on x86, didn't check other archs.

However, from Documentation/kmemcheck.txt, seems kmemcheck only supports
x86.

>
> > 2. If CONFIG_KMEMCHECK is enabled, the pages allocated through slab will
> > be marked as non-present, to capture uninitialized memory access. More
> > information in Documentation/kmemcheck.txt .
>
> So then kmemcheck is buggy, since the nmiaction structure is initialized
> in register_nmi_handler(), so it should most definitely not be marked
> non-present.
>

I'm not sure whether I understand it correctly. Do you mean that
nmiaction is initialized in register_nmi_handler(), which indicates it
will be used in nmi, so it shouldn't be marked non-present?

But for kmemcheck, why need it know the information that page fault is
not allowed in nmi?

Or maybe I misunderstand your point here?

> > 3. From the log, there are some memories accessed in nmi, which are in
> > pages marked as non-present by kmemcheck, as they are allocated by
> > something like kmalloc().
>
> So figure out why and fix that instead of writing ugly ass patches that
> seemingly work around the problem without actually thinking about it.
>

I think the reason is that kmalloc ( or kzalloc ... ) uses malloc_sizes
slab caches to allocate memory. The malloc_sizes slab caches is set up
without SLAB_NOTRACK flag, then kmemcheck marks the pages non-present to
do its check in page fault handling code. I think we shouldn't disable
kmemechek for the general malloc_sizes caches.

Thanks,
Zhong



2012-02-27 10:59:08

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 0/2 x86] fix some page faults in nmi if kmemcheck is enabled

On Thu, 2012-02-23 at 17:53 +0800, Li Zhong wrote:

> I will think further about it, and would appreciate it if you could give
> some good ideas.

*sigh*.. or you could do your own damn work..

> > > 2. If CONFIG_KMEMCHECK is enabled, the pages allocated through slab will
> > > be marked as non-present, to capture uninitialized memory access. More
> > > information in Documentation/kmemcheck.txt .
> >
> > So then kmemcheck is buggy, since the nmiaction structure is initialized
> > in register_nmi_handler(), so it should most definitely not be marked
> > non-present.
> >
>
> I'm not sure whether I understand it correctly. Do you mean that
> nmiaction is initialized in register_nmi_handler(), which indicates it
> will be used in nmi, so it shouldn't be marked non-present?

No, you said that it marks memory non-present to detect uninitialized
stuff, but since it is initialized, it shouldn't then be non-present,
right?

> But for kmemcheck, why need it know the information that page fault is
> not allowed in nmi?

Uh, what?

> > > 3. From the log, there are some memories accessed in nmi, which are in
> > > pages marked as non-present by kmemcheck, as they are allocated by
> > > something like kmalloc().
> >
> > So figure out why and fix that instead of writing ugly ass patches that
> > seemingly work around the problem without actually thinking about it.
> >
>
> I think the reason is that kmalloc ( or kzalloc ... ) uses malloc_sizes
> slab caches to allocate memory. The malloc_sizes slab caches is set up
> without SLAB_NOTRACK flag, then kmemcheck marks the pages non-present to
> do its check in page fault handling code. I think we shouldn't disable
> kmemechek for the general malloc_sizes caches.

Nobody said you should.. there's plenty of solutions that aren't ass
backward stupid nor as ugly.

First you need to figure out why the page is marked non-present since
the data structure is initialized (I've got a fair idea why), then look
if you can tell kmemcheck not to be silly like that.

Alternatively you can change the nmi stuff to use static storage like
other notifiers (see notifier_block).

What you don't ever do is write alternative code paths that are never
ever used except for debugging, that is just asking for problems.

2012-02-28 02:45:58

by Li Zhong

[permalink] [raw]
Subject: Re: [PATCH 0/2 x86] fix some page faults in nmi if kmemcheck is enabled

On Mon, 2012-02-27 at 11:58 +0100, Peter Zijlstra wrote:
> On Thu, 2012-02-23 at 17:53 +0800, Li Zhong wrote:
>
> > I will think further about it, and would appreciate it if you could give
> > some good ideas.
>
> *sigh*.. or you could do your own damn work..

I'm still doing the work ...

>
> > > > 2. If CONFIG_KMEMCHECK is enabled, the pages allocated through slab will
> > > > be marked as non-present, to capture uninitialized memory access. More
> > > > information in Documentation/kmemcheck.txt .
> > >
> > > So then kmemcheck is buggy, since the nmiaction structure is initialized
> > > in register_nmi_handler(), so it should most definitely not be marked
> > > non-present.
> > >
> >
> > I'm not sure whether I understand it correctly. Do you mean that
> > nmiaction is initialized in register_nmi_handler(), which indicates it
> > will be used in nmi, so it shouldn't be marked non-present?
>
> No, you said that it marks memory non-present to detect uninitialized
> stuff, but since it is initialized, it shouldn't then be non-present,
> right?

>From my understanding of kmemcheck, the checking is based on the
non-present page. So while handling page fault, if the memory hasn't
been written before read, kmemcheck knows that it is uninitialized.

I think it is used to find code errors, so it need mark all non-present,
to check if there are any access to uninitialized memory.

>
> > But for kmemcheck, why need it know the information that page fault is
> > not allowed in nmi?
>
> Uh, what?

Please ignore it, as I misunderstood your point previously.

> > > > 3. From the log, there are some memories accessed in nmi, which are in
> > > > pages marked as non-present by kmemcheck, as they are allocated by
> > > > something like kmalloc().
> > >
> > > So figure out why and fix that instead of writing ugly ass patches that
> > > seemingly work around the problem without actually thinking about it.
> > >
> >
> > I think the reason is that kmalloc ( or kzalloc ... ) uses malloc_sizes
> > slab caches to allocate memory. The malloc_sizes slab caches is set up
> > without SLAB_NOTRACK flag, then kmemcheck marks the pages non-present to
> > do its check in page fault handling code. I think we shouldn't disable
> > kmemechek for the general malloc_sizes caches.
>
> Nobody said you should.. there's plenty of solutions that aren't ass
> backward stupid nor as ugly.
>
> First you need to figure out why the page is marked non-present since
> the data structure is initialized (I've got a fair idea why), then look
> if you can tell kmemcheck not to be silly like that.
>
> Alternatively you can change the nmi stuff to use static storage like
> other notifiers (see notifier_block).

OK, I will try to update the nmi one using this way.

But I think it couldn't be used to the perf stuff.
For perf, maybe it's good for kmemcheck to have some flag like
__GFP_NO_PAGE_FAULT? Currently it seems only has flags like
__GFP_NOTRACK, which will still mark page non-present.

>
> What you don't ever do is write alternative code paths that are never
> ever used except for debugging, that is just asking for problems.
>

Got it, thanks for the reminder! Previously, I thought the biggest
problem was wasting memory ...

Thanks,
Zhong