Object scan areas are an optimisation aimed to decrease the false
positives and slightly improve the scanning time of large objects known
to only have a few specific pointers. If a struct scan_area fails to
allocate, kmemleak can still function normally by scanning the full
object.
Introduce an OBJECT_FULL_SCAN flag and mark objects as such when
scan_area allocation fails.
Signed-off-by: Catalin Marinas <[email protected]>
---
mm/kmemleak.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index f6e602918dac..5ba7fad00fda 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -168,6 +168,8 @@ struct kmemleak_object {
#define OBJECT_REPORTED (1 << 1)
/* flag set to not scan the object */
#define OBJECT_NO_SCAN (1 << 2)
+/* flag set to fully scan the object when scan_area allocation failed */
+#define OBJECT_FULL_SCAN (1 << 3)
#define HEX_PREFIX " "
/* number of bytes to print per line; must be 16 or 32 */
@@ -773,12 +775,14 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
}
area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
- if (!area) {
- pr_warn("Cannot allocate a scan area\n");
- goto out;
- }
spin_lock_irqsave(&object->lock, flags);
+ if (!area) {
+ pr_warn_once("Cannot allocate a scan area, scanning the full object\n");
+ /* mark the object for full scan to avoid false positives */
+ object->flags |= OBJECT_FULL_SCAN;
+ goto out_unlock;
+ }
if (size == SIZE_MAX) {
size = object->pointer + object->size - ptr;
} else if (ptr + size > object->pointer + object->size) {
@@ -795,7 +799,6 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
hlist_add_head(&area->node, &object->area_list);
out_unlock:
spin_unlock_irqrestore(&object->lock, flags);
-out:
put_object(object);
}
@@ -1408,7 +1411,8 @@ static void scan_object(struct kmemleak_object *object)
if (!(object->flags & OBJECT_ALLOCATED))
/* already freed object */
goto out;
- if (hlist_empty(&object->area_list)) {
+ if (hlist_empty(&object->area_list) ||
+ object->flags & OBJECT_FULL_SCAN) {
void *start = (void *)object->pointer;
void *end = (void *)(object->pointer + object->size);
void *next;
On 13/08/2019 02:06, Catalin Marinas wrote:
> Object scan areas are an optimisation aimed to decrease the false
> positives and slightly improve the scanning time of large objects known
> to only have a few specific pointers. If a struct scan_area fails to
> allocate, kmemleak can still function normally by scanning the full
> object.
>
> Introduce an OBJECT_FULL_SCAN flag and mark objects as such when
> scan_area allocation fails.
I came across this one while bisecting sudden drop in throughput of a 100Gbit Mellanox CX4 ethernet card in a PPC POWER9
system, the speed dropped from 100Gbit to about 40Gbit. Bisect pointed at dba82d943177, this are the relevant config
options:
[fstn1-p1 kernel]$ grep KMEMLEAK ~/pbuild/kernel-le-4g/.config
CONFIG_HAVE_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=16000
# CONFIG_DEBUG_KMEMLEAK_TEST is not set
# CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF is not set
CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y
Setting CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE=400 or even 4000 (this is what KMEMLEAK_EARLY_LOG_SIZE is now in the master)
produces soft lockups on the recent upstream (sha1 a3c0e7b1fe1f):
[c000001fde64fb60] [c000000000c24ed4] _raw_write_unlock_irqrestore+0x54/0x70
[c000001fde64fb90] [c0000000004117e4] find_and_remove_object+0xa4/0xd0
[c000001fde64fbe0] [c000000000411c74] delete_object_full+0x24/0x50
[c000001fde64fc00] [c000000000411d28] __kmemleak_do_cleanup+0x88/0xd0
[c000001fde64fc40] [c00000000012a1a4] process_one_work+0x374/0x6a0
[c000001fde64fd20] [c00000000012a548] worker_thread+0x78/0x5a0
[c000001fde64fdb0] [c000000000135508] kthread+0x198/0x1a0
[c000001fde64fe20] [c00000000000b980] ret_from_kernel_thread+0x5c/0x7c
KMEMLEAK_EARLY_LOG_SIZE=8000 works but slow.
Interestingly KMEMLEAK_EARLY_LOG_SIZE=400 on dba82d943177 still worked and I saw my 100Gbit. Disabling KMEMLEAK also
fixes the speed (apparently).
Is that something expected? Thanks,
>
> Signed-off-by: Catalin Marinas <[email protected]>
> ---
> mm/kmemleak.c | 16 ++++++++++------
> 1 file changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/mm/kmemleak.c b/mm/kmemleak.c
> index f6e602918dac..5ba7fad00fda 100644
> --- a/mm/kmemleak.c
> +++ b/mm/kmemleak.c
> @@ -168,6 +168,8 @@ struct kmemleak_object {
> #define OBJECT_REPORTED (1 << 1)
> /* flag set to not scan the object */
> #define OBJECT_NO_SCAN (1 << 2)
> +/* flag set to fully scan the object when scan_area allocation failed */
> +#define OBJECT_FULL_SCAN (1 << 3)
>
> #define HEX_PREFIX " "
> /* number of bytes to print per line; must be 16 or 32 */
> @@ -773,12 +775,14 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
> }
>
> area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
> - if (!area) {
> - pr_warn("Cannot allocate a scan area\n");
> - goto out;
> - }
>
> spin_lock_irqsave(&object->lock, flags);
> + if (!area) {
> + pr_warn_once("Cannot allocate a scan area, scanning the full object\n");
> + /* mark the object for full scan to avoid false positives */
> + object->flags |= OBJECT_FULL_SCAN;
> + goto out_unlock;
> + }
> if (size == SIZE_MAX) {
> size = object->pointer + object->size - ptr;
> } else if (ptr + size > object->pointer + object->size) {
> @@ -795,7 +799,6 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
> hlist_add_head(&area->node, &object->area_list);
> out_unlock:
> spin_unlock_irqrestore(&object->lock, flags);
> -out:
> put_object(object);
> }
>
> @@ -1408,7 +1411,8 @@ static void scan_object(struct kmemleak_object *object)
> if (!(object->flags & OBJECT_ALLOCATED))
> /* already freed object */
> goto out;
> - if (hlist_empty(&object->area_list)) {
> + if (hlist_empty(&object->area_list) ||
> + object->flags & OBJECT_FULL_SCAN) {
> void *start = (void *)object->pointer;
> void *end = (void *)(object->pointer + object->size);
> void *next;
>
--
Alexey
> On Oct 3, 2019, at 2:13 AM, Alexey Kardashevskiy <[email protected]> wrote:
>
> I came across this one while bisecting sudden drop in throughput of a 100Gbit Mellanox CX4 ethernet card in a PPC POWER9
> system, the speed dropped from 100Gbit to about 40Gbit. Bisect pointed at dba82d943177, this are the relevant config
> options:
>
> [fstn1-p1 kernel]$ grep KMEMLEAK ~/pbuild/kernel-le-4g/.config
> CONFIG_HAVE_DEBUG_KMEMLEAK=y
> CONFIG_DEBUG_KMEMLEAK=y
> CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=16000
> # CONFIG_DEBUG_KMEMLEAK_TEST is not set
> # CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF is not set
> CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y
>
> Setting CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE=400 or even 4000 (this is what KMEMLEAK_EARLY_LOG_SIZE is now in the master)
> produces soft lockups on the recent upstream (sha1 a3c0e7b1fe1f):
>
> [c000001fde64fb60] [c000000000c24ed4] _raw_write_unlock_irqrestore+0x54/0x70
> [c000001fde64fb90] [c0000000004117e4] find_and_remove_object+0xa4/0xd0
> [c000001fde64fbe0] [c000000000411c74] delete_object_full+0x24/0x50
> [c000001fde64fc00] [c000000000411d28] __kmemleak_do_cleanup+0x88/0xd0
> [c000001fde64fc40] [c00000000012a1a4] process_one_work+0x374/0x6a0
> [c000001fde64fd20] [c00000000012a548] worker_thread+0x78/0x5a0
> [c000001fde64fdb0] [c000000000135508] kthread+0x198/0x1a0
> [c000001fde64fe20] [c00000000000b980] ret_from_kernel_thread+0x5c/0x7c
>
> KMEMLEAK_EARLY_LOG_SIZE=8000 works but slow.
>
> Interestingly KMEMLEAK_EARLY_LOG_SIZE=400 on dba82d943177 still worked and I saw my 100Gbit. Disabling KMEMLEAK also
> fixes the speed (apparently).
>
> Is that something expected? Thanks,
It is expected that a debug option like this will make the system slower.
On Thu, Oct 03, 2019 at 04:13:07PM +1000, Alexey Kardashevskiy wrote:
> On 13/08/2019 02:06, Catalin Marinas wrote:
> > Object scan areas are an optimisation aimed to decrease the false
> > positives and slightly improve the scanning time of large objects known
> > to only have a few specific pointers. If a struct scan_area fails to
> > allocate, kmemleak can still function normally by scanning the full
> > object.
> >
> > Introduce an OBJECT_FULL_SCAN flag and mark objects as such when
> > scan_area allocation fails.
>
> I came across this one while bisecting sudden drop in throughput of a
> 100Gbit Mellanox CX4 ethernet card in a PPC POWER9 system, the speed
> dropped from 100Gbit to about 40Gbit. Bisect pointed at dba82d943177,
> this are the relevant config options:
>
> [fstn1-p1 kernel]$ grep KMEMLEAK ~/pbuild/kernel-le-4g/.config
> CONFIG_HAVE_DEBUG_KMEMLEAK=y
> CONFIG_DEBUG_KMEMLEAK=y
> CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=16000
> # CONFIG_DEBUG_KMEMLEAK_TEST is not set
> # CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF is not set
> CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y
The throughput drop is probably caused caused by kmemleak slowing down
all memory allocations (including skb). So that's expected. You may get
similar drop with other debug options like lock proving, kasan.
> Setting CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE=400 or even 4000 (this is
> what KMEMLEAK_EARLY_LOG_SIZE is now in the master) produces soft
> lockups on the recent upstream (sha1 a3c0e7b1fe1f):
>
> [c000001fde64fb60] [c000000000c24ed4] _raw_write_unlock_irqrestore+0x54/0x70
> [c000001fde64fb90] [c0000000004117e4] find_and_remove_object+0xa4/0xd0
> [c000001fde64fbe0] [c000000000411c74] delete_object_full+0x24/0x50
> [c000001fde64fc00] [c000000000411d28] __kmemleak_do_cleanup+0x88/0xd0
> [c000001fde64fc40] [c00000000012a1a4] process_one_work+0x374/0x6a0
> [c000001fde64fd20] [c00000000012a548] worker_thread+0x78/0x5a0
> [c000001fde64fdb0] [c000000000135508] kthread+0x198/0x1a0
> [c000001fde64fe20] [c00000000000b980] ret_from_kernel_thread+0x5c/0x7c
That's the kmemleak disabling path. I don't have the full log but I
suspect by setting a small pool size, kmemleak failed to allocate memory
and went into disabling itself. The clean-up above tries to remove the
allocated metadata. It seems that it takes significant time on your
platform. Not sure how to avoid the soft lock-up but I wouldn't bother
too much about it, it's only triggered by a previous error condition
disabling kmemleak.
> KMEMLEAK_EARLY_LOG_SIZE=8000 works but slow.
>
> Interestingly KMEMLEAK_EARLY_LOG_SIZE=400 on dba82d943177 still worked
> and I saw my 100Gbit. Disabling KMEMLEAK also fixes the speed
> (apparently).
A small memory pool for kmemleak just disables it shortly after boot, so
it's no longer in the way and you get your throughput back.
> Is that something expected? Thanks,
Yes for the throughput. Not sure about the soft lock-up. Do you have the
full log around the lock-up?
--
Catalin
On 03/10/2019 18:41, Catalin Marinas wrote:
> On Thu, Oct 03, 2019 at 04:13:07PM +1000, Alexey Kardashevskiy wrote:
>> On 13/08/2019 02:06, Catalin Marinas wrote:
>>> Object scan areas are an optimisation aimed to decrease the false
>>> positives and slightly improve the scanning time of large objects known
>>> to only have a few specific pointers. If a struct scan_area fails to
>>> allocate, kmemleak can still function normally by scanning the full
>>> object.
>>>
>>> Introduce an OBJECT_FULL_SCAN flag and mark objects as such when
>>> scan_area allocation fails.
>>
>> I came across this one while bisecting sudden drop in throughput of a
>> 100Gbit Mellanox CX4 ethernet card in a PPC POWER9 system, the speed
>> dropped from 100Gbit to about 40Gbit. Bisect pointed at dba82d943177,
>> this are the relevant config options:
>>
>> [fstn1-p1 kernel]$ grep KMEMLEAK ~/pbuild/kernel-le-4g/.config
>> CONFIG_HAVE_DEBUG_KMEMLEAK=y
>> CONFIG_DEBUG_KMEMLEAK=y
>> CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=16000
>> # CONFIG_DEBUG_KMEMLEAK_TEST is not set
>> # CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF is not set
>> CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y
>
> The throughput drop is probably caused caused by kmemleak slowing down
> all memory allocations (including skb). So that's expected. You may get
> similar drop with other debug options like lock proving, kasan.
I was not precise. I meant that before dba82d943177 kmemleak would work but would not slow network down (at least
100Gbit) and now it does which is downgrade so I was wondering if kmemleak just got so much better to justify this
change or there is a bug somewhere, so which one is it? Or "LOG_SIZE=400" never really worked? See my findings below though.
If it was always slow, it is expected indeed.
>
>> Setting CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE=400 or even 4000 (this is
>> what KMEMLEAK_EARLY_LOG_SIZE is now in the master) produces soft
>> lockups on the recent upstream (sha1 a3c0e7b1fe1f):
>>
>> [c000001fde64fb60] [c000000000c24ed4] _raw_write_unlock_irqrestore+0x54/0x70
>> [c000001fde64fb90] [c0000000004117e4] find_and_remove_object+0xa4/0xd0
>> [c000001fde64fbe0] [c000000000411c74] delete_object_full+0x24/0x50
>> [c000001fde64fc00] [c000000000411d28] __kmemleak_do_cleanup+0x88/0xd0
>> [c000001fde64fc40] [c00000000012a1a4] process_one_work+0x374/0x6a0
>> [c000001fde64fd20] [c00000000012a548] worker_thread+0x78/0x5a0
>> [c000001fde64fdb0] [c000000000135508] kthread+0x198/0x1a0
>> [c000001fde64fe20] [c00000000000b980] ret_from_kernel_thread+0x5c/0x7c
>
> That's the kmemleak disabling path. I don't have the full log but I
> suspect by setting a small pool size, kmemleak failed to allocate memory
> and went into disabling itself. The clean-up above tries to remove the
> allocated metadata. It seems that it takes significant time on your
> platform. Not sure how to avoid the soft lock-up but I wouldn't bother
> too much about it, it's only triggered by a previous error condition
> disabling kmemleak.
>
>> KMEMLEAK_EARLY_LOG_SIZE=8000 works but slow.
>>
>> Interestingly KMEMLEAK_EARLY_LOG_SIZE=400 on dba82d943177 still worked
>> and I saw my 100Gbit. Disabling KMEMLEAK also fixes the speed
>> (apparently).
>
> A small memory pool for kmemleak just disables it shortly after boot, so
> it's no longer in the way and you get your throughput back.
>
>> Is that something expected? Thanks,
>
> Yes for the throughput. Not sure about the soft lock-up. Do you have the
> full log around the lock-up?
I was going to post one but then I received "kmemleak: Do not corrupt the object_list during clean-up" which fixed
lockups and took throughput back to normal, I'll reply there too. Thanks,
--
Alexey
On Sat, Oct 05, 2019 at 01:08:43PM +1000, Alexey Kardashevskiy wrote:
> On 03/10/2019 18:41, Catalin Marinas wrote:
> > On Thu, Oct 03, 2019 at 04:13:07PM +1000, Alexey Kardashevskiy wrote:
> >> On 13/08/2019 02:06, Catalin Marinas wrote:
> >>> Object scan areas are an optimisation aimed to decrease the false
> >>> positives and slightly improve the scanning time of large objects known
> >>> to only have a few specific pointers. If a struct scan_area fails to
> >>> allocate, kmemleak can still function normally by scanning the full
> >>> object.
> >>>
> >>> Introduce an OBJECT_FULL_SCAN flag and mark objects as such when
> >>> scan_area allocation fails.
> >>
> >> I came across this one while bisecting sudden drop in throughput of a
> >> 100Gbit Mellanox CX4 ethernet card in a PPC POWER9 system, the speed
> >> dropped from 100Gbit to about 40Gbit. Bisect pointed at dba82d943177,
> >> this are the relevant config options:
> >>
> >> [fstn1-p1 kernel]$ grep KMEMLEAK ~/pbuild/kernel-le-4g/.config
> >> CONFIG_HAVE_DEBUG_KMEMLEAK=y
> >> CONFIG_DEBUG_KMEMLEAK=y
> >> CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=16000
> >> # CONFIG_DEBUG_KMEMLEAK_TEST is not set
> >> # CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF is not set
> >> CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y
> >
> > The throughput drop is probably caused caused by kmemleak slowing down
> > all memory allocations (including skb). So that's expected. You may get
> > similar drop with other debug options like lock proving, kasan.
>
> I was not precise. I meant that before dba82d943177 kmemleak would
> work but would not slow network down (at least 100Gbit) and now it
> does which is downgrade so I was wondering if kmemleak just got so
> much better to justify this change or there is a bug somewhere, so
> which one is it? Or "LOG_SIZE=400" never really worked?
I suspect LOG_SIZE=400 never worked on your system (you can check the
log for any kmemleak disabled messages).
Thanks for testing the patch.
--
Catalin