Hi,
In the last few days, I went through of false positives reported by
kmemleak and it turns out some of them were caused by not tracking
alloc_bootmem* calls. Rather than adding more and more kmemleak
annotations throughout the kernel, I decided to add support for tracking
all the alloc_bootmem* and free_bootmem calls.
The latter may not have a corresponding alloc_bootmem* pair or it may
only free part of a block. I changed kmemleak to support this usage.
Thanks for your feedback.
Catalin Marinas (3):
kmemleak: Remove alloc_bootmem annotations introduced in the past
kmemleak: Add callbacks to the bootmem allocator
kmemleak: Allow partial freeing of memory blocks
include/linux/kmemleak.h | 4 +++
kernel/pid.c | 7 ------
mm/bootmem.c | 36 ++++++++++++++++++++++++------
mm/kmemleak.c | 55 ++++++++++++++++++++++++++++++++++++++++++----
mm/page_alloc.c | 14 +++---------
5 files changed, 86 insertions(+), 30 deletions(-)
--
Catalin
Functions like free_bootmem() are allowed to free only part of a memory
block. This patch adds support for this via the kmemleak_free_part()
callback which removes the original object and creates one or two
additional objects as a result of the memory block split.
Signed-off-by: Catalin Marinas <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Pekka Enberg <[email protected]>
---
include/linux/kmemleak.h | 4 +++
mm/kmemleak.c | 55 ++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 54 insertions(+), 5 deletions(-)
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 7796aed..6a63807 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -27,6 +27,7 @@ extern void kmemleak_init(void);
extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
gfp_t gfp);
extern void kmemleak_free(const void *ptr);
+extern void kmemleak_free_part(const void *ptr, size_t size);
extern void kmemleak_padding(const void *ptr, unsigned long offset,
size_t size);
extern void kmemleak_not_leak(const void *ptr);
@@ -71,6 +72,9 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
static inline void kmemleak_free(const void *ptr)
{
}
+static inline void kmemleak_free_part(const void *ptr, size_t size)
+{
+}
static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
{
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 5f7d8ae..57f8081 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -210,6 +210,7 @@ static DEFINE_MUTEX(scan_mutex);
enum {
KMEMLEAK_ALLOC,
KMEMLEAK_FREE,
+ KMEMLEAK_FREE_PART,
KMEMLEAK_NOT_LEAK,
KMEMLEAK_IGNORE,
KMEMLEAK_SCAN_AREA,
@@ -522,15 +523,20 @@ out:
/*
* Remove the metadata (struct kmemleak_object) for a memory block from the
- * object_list and object_tree_root and decrement its use_count.
+ * object_list and object_tree_root and decrement its use_count. If the memory
+ * block is partially freed (size > 0), the function may create additional
+ * metadata for the remaining parts of the block.
*/
-static void delete_object(unsigned long ptr)
+static void delete_object(unsigned long ptr, size_t size)
{
unsigned long flags;
struct kmemleak_object *object;
+ unsigned long start, end;
+ unsigned int min_count;
+ gfp_t gfp_flags;
write_lock_irqsave(&kmemleak_lock, flags);
- object = lookup_object(ptr, 0);
+ object = lookup_object(ptr, 1);
if (!object) {
#ifdef DEBUG
kmemleak_warn("Freeing unknown object at 0x%08lx\n",
@@ -552,8 +558,29 @@ static void delete_object(unsigned long ptr)
*/
spin_lock_irqsave(&object->lock, flags);
object->flags &= ~OBJECT_ALLOCATED;
+ start = object->pointer;
+ end = object->pointer + object->size;
+ min_count = object->min_count;
spin_unlock_irqrestore(&object->lock, flags);
put_object(object);
+
+ if (!size)
+ return;
+
+ /*
+ * Partial freeing. Just create one or two objects that may result
+ * from the memory block split.
+ */
+ if (in_atomic())
+ gfp_flags = GFP_ATOMIC;
+ else
+ gfp_flags = GFP_KERNEL;
+
+ if (ptr > start)
+ create_object(start, ptr - start, min_count, gfp_flags);
+ if (ptr + size < end)
+ create_object(ptr + size, end - ptr - size, min_count,
+ gfp_flags);
}
/*
@@ -720,13 +747,28 @@ void kmemleak_free(const void *ptr)
pr_debug("%s(0x%p)\n", __func__, ptr);
if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
- delete_object((unsigned long)ptr);
+ delete_object((unsigned long)ptr, 0);
else if (atomic_read(&kmemleak_early_log))
log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
}
EXPORT_SYMBOL_GPL(kmemleak_free);
/*
+ * Partial memory freeing function callback. This function is usually called
+ * from bootmem allocator when (part of) a memory block is freed.
+ */
+void kmemleak_free_part(const void *ptr, size_t size)
+{
+ pr_debug("%s(0x%p)\n", __func__, ptr);
+
+ if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+ delete_object((unsigned long)ptr, size);
+ else if (atomic_read(&kmemleak_early_log))
+ log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free_part);
+
+/*
* Mark an already allocated memory block as a false positive. This will cause
* the block to no longer be reported as leak and always be scanned.
*/
@@ -1345,7 +1387,7 @@ static int kmemleak_cleanup_thread(void *arg)
rcu_read_lock();
list_for_each_entry_rcu(object, &object_list, object_list)
- delete_object(object->pointer);
+ delete_object(object->pointer, 0);
rcu_read_unlock();
mutex_unlock(&scan_mutex);
@@ -1440,6 +1482,9 @@ void __init kmemleak_init(void)
case KMEMLEAK_FREE:
kmemleak_free(log->ptr);
break;
+ case KMEMLEAK_FREE_PART:
+ kmemleak_free_part(log->ptr, log->size);
+ break;
case KMEMLEAK_NOT_LEAK:
kmemleak_not_leak(log->ptr);
break;
This patch adds kmemleak_alloc/free callbacks to the bootmem allocator.
This would allow scanning of such blocks and help avoiding a whole class
of false positives and more kmemleak annotations.
Signed-off-by: Catalin Marinas <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Pekka Enberg <[email protected]>
---
mm/bootmem.c | 36 +++++++++++++++++++++++++++++-------
1 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d2a9ce9..18858ad 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -335,6 +335,8 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
{
unsigned long start, end;
+ kmemleak_free(__va(physaddr));
+
start = PFN_UP(physaddr);
end = PFN_DOWN(physaddr + size);
@@ -354,6 +356,8 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
{
unsigned long start, end;
+ kmemleak_free_part(__va(addr), size);
+
start = PFN_UP(addr);
end = PFN_DOWN(addr + size);
@@ -597,7 +601,9 @@ restart:
void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
unsigned long goal)
{
- return ___alloc_bootmem_nopanic(size, align, goal, 0);
+ void *ptr = ___alloc_bootmem_nopanic(size, align, goal, 0);
+ kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
+ return ptr;
}
static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
@@ -631,7 +637,9 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
void * __init __alloc_bootmem(unsigned long size, unsigned long align,
unsigned long goal)
{
- return ___alloc_bootmem(size, align, goal, 0);
+ void *ptr = ___alloc_bootmem(size, align, goal, 0);
+ kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
+ return ptr;
}
static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
@@ -669,10 +677,14 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
- return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+ kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
+ return ptr;
}
#ifdef CONFIG_SPARSEMEM
@@ -707,14 +719,18 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
+ kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
if (ptr)
return ptr;
ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
+ kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
if (ptr)
return ptr;
- return __alloc_bootmem_nopanic(size, align, goal);
+ ptr = __alloc_bootmem_nopanic(size, align, goal);
+ kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
+ return ptr;
}
#ifndef ARCH_LOW_ADDRESS_LIMIT
@@ -737,7 +753,9 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
unsigned long goal)
{
- return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
+ void *ptr = ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
+ kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
+ return ptr;
}
/**
@@ -758,9 +776,13 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
- return ___alloc_bootmem_node(pgdat->bdata, size, align,
- goal, ARCH_LOW_ADDRESS_LIMIT);
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+ kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
+ return ptr;
}
kmemleak_alloc() calls were added in some places where alloc_bootmem was
called. Since now kmemleak tracks bootmem allocations, these explicit
calls should be run.
Signed-off-by: Catalin Marinas <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Pekka Enberg <[email protected]>
---
kernel/pid.c | 7 -------
mm/page_alloc.c | 14 +++-----------
2 files changed, 3 insertions(+), 18 deletions(-)
diff --git a/kernel/pid.c b/kernel/pid.c
index 5fa1db4..31310b5 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,7 +36,6 @@
#include <linux/pid_namespace.h>
#include <linux/init_task.h>
#include <linux/syscalls.h>
-#include <linux/kmemleak.h>
#define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -513,12 +512,6 @@ void __init pidhash_init(void)
pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
if (!pid_hash)
panic("Could not alloc pidhash!\n");
- /*
- * pid_hash contains references to allocated struct pid objects and it
- * must be scanned by kmemleak to avoid false positives.
- */
- kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0,
- GFP_KERNEL);
for (i = 0; i < pidhash_size; i++)
INIT_HLIST_HEAD(&pid_hash[i]);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e0f2cdf..202ef6b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4745,8 +4745,10 @@ void *__init alloc_large_system_hash(const char *tablename,
* some pages at the end of hash table which
* alloc_pages_exact() automatically does
*/
- if (get_order(size) < MAX_ORDER)
+ if (get_order(size) < MAX_ORDER) {
table = alloc_pages_exact(size, GFP_ATOMIC);
+ kmemleak_alloc(table, size, 1, GFP_ATOMIC);
+ }
}
} while (!table && size > PAGE_SIZE && --log2qty);
@@ -4764,16 +4766,6 @@ void *__init alloc_large_system_hash(const char *tablename,
if (_hash_mask)
*_hash_mask = (1 << log2qty) - 1;
- /*
- * If hashdist is set, the table allocation is done with __vmalloc()
- * which invokes the kmemleak_alloc() callback. This function may also
- * be called before the slab and kmemleak are initialised when
- * kmemleak simply buffers the request to be executed later
- * (GFP_ATOMIC flag ignored in this case).
- */
- if (!hashdist)
- kmemleak_alloc(table, size, 1, GFP_ATOMIC);
-
return table;
}
On Mon, 2009-07-06 at 11:51 +0100, Catalin Marinas wrote:
> This patch adds kmemleak_alloc/free callbacks to the bootmem allocator.
> This would allow scanning of such blocks and help avoiding a whole class
> of false positives and more kmemleak annotations.
>
> Signed-off-by: Catalin Marinas <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Pekka Enberg <[email protected]>
> ---
> mm/bootmem.c | 36 +++++++++++++++++++++++++++++-------
> 1 files changed, 29 insertions(+), 7 deletions(-)
>
> diff --git a/mm/bootmem.c b/mm/bootmem.c
> index d2a9ce9..18858ad 100644
> --- a/mm/bootmem.c
> +++ b/mm/bootmem.c
> @@ -335,6 +335,8 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
> {
> unsigned long start, end;
>
> + kmemleak_free(__va(physaddr));
This should actually be
+ kmemleak_free_part(__va(physaddr), size);
--
Catalin
On Mon, 2009-07-06 at 11:51 +0100, Catalin Marinas wrote:
> This patch adds kmemleak_alloc/free callbacks to the bootmem allocator.
> This would allow scanning of such blocks and help avoiding a whole class
> of false positives and more kmemleak annotations.
>
> Signed-off-by: Catalin Marinas <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Pekka Enberg <[email protected]>
Looks good to me!
Acked-by: Pekka Enberg <[email protected]>
But lets cc Johannes on this too.
> ---
> mm/bootmem.c | 36 +++++++++++++++++++++++++++++-------
> 1 files changed, 29 insertions(+), 7 deletions(-)
>
> diff --git a/mm/bootmem.c b/mm/bootmem.c
> index d2a9ce9..18858ad 100644
> --- a/mm/bootmem.c
> +++ b/mm/bootmem.c
> @@ -335,6 +335,8 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
> {
> unsigned long start, end;
>
> + kmemleak_free(__va(physaddr));
> +
> start = PFN_UP(physaddr);
> end = PFN_DOWN(physaddr + size);
>
> @@ -354,6 +356,8 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
> {
> unsigned long start, end;
>
> + kmemleak_free_part(__va(addr), size);
> +
> start = PFN_UP(addr);
> end = PFN_DOWN(addr + size);
>
> @@ -597,7 +601,9 @@ restart:
> void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
> unsigned long goal)
> {
> - return ___alloc_bootmem_nopanic(size, align, goal, 0);
> + void *ptr = ___alloc_bootmem_nopanic(size, align, goal, 0);
> + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> + return ptr;
> }
>
> static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
> @@ -631,7 +637,9 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
> void * __init __alloc_bootmem(unsigned long size, unsigned long align,
> unsigned long goal)
> {
> - return ___alloc_bootmem(size, align, goal, 0);
> + void *ptr = ___alloc_bootmem(size, align, goal, 0);
> + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> + return ptr;
> }
>
> static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
> @@ -669,10 +677,14 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
> void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
> unsigned long align, unsigned long goal)
> {
> + void *ptr;
> +
> if (WARN_ON_ONCE(slab_is_available()))
> return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
>
> - return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
> + ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
> + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> + return ptr;
> }
>
> #ifdef CONFIG_SPARSEMEM
> @@ -707,14 +719,18 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
> return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
>
> ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
> + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> if (ptr)
> return ptr;
>
> ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
> + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> if (ptr)
> return ptr;
>
> - return __alloc_bootmem_nopanic(size, align, goal);
> + ptr = __alloc_bootmem_nopanic(size, align, goal);
> + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> + return ptr;
> }
>
> #ifndef ARCH_LOW_ADDRESS_LIMIT
> @@ -737,7 +753,9 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
> void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
> unsigned long goal)
> {
> - return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
> + void *ptr = ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
> + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> + return ptr;
> }
>
> /**
> @@ -758,9 +776,13 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
> void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
> unsigned long align, unsigned long goal)
> {
> + void *ptr;
> +
> if (WARN_ON_ONCE(slab_is_available()))
> return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
>
> - return ___alloc_bootmem_node(pgdat->bdata, size, align,
> - goal, ARCH_LOW_ADDRESS_LIMIT);
> + ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
> + goal, ARCH_LOW_ADDRESS_LIMIT);
> + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> + return ptr;
> }
>
On Mon, 2009-07-06 at 11:51 +0100, Catalin Marinas wrote:
> @@ -552,8 +558,29 @@ static void delete_object(unsigned long ptr)
> */
> spin_lock_irqsave(&object->lock, flags);
> object->flags &= ~OBJECT_ALLOCATED;
> + start = object->pointer;
> + end = object->pointer + object->size;
> + min_count = object->min_count;
> spin_unlock_irqrestore(&object->lock, flags);
> put_object(object);
> +
> + if (!size)
> + return;
> +
> + /*
> + * Partial freeing. Just create one or two objects that may result
> + * from the memory block split.
> + */
> + if (in_atomic())
> + gfp_flags = GFP_ATOMIC;
> + else
> + gfp_flags = GFP_KERNEL;
Are you sure we can do this? There's a big fat comment on top of
in_atomic() that suggest this is not safe. Why do we need to create the
object here anyway and not in the _alloc_ paths where gfp flags are
explicitly passed?
> +
> + if (ptr > start)
> + create_object(start, ptr - start, min_count, gfp_flags);
> + if (ptr + size < end)
> + create_object(ptr + size, end - ptr - size, min_count,
> + gfp_flags);
> }
>
> /*
On Mon, 2009-07-06 at 11:52 +0100, Catalin Marinas wrote:
> kmemleak_alloc() calls were added in some places where alloc_bootmem was
> called. Since now kmemleak tracks bootmem allocations, these explicit
> calls should be run.
>
> Signed-off-by: Catalin Marinas <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Pekka Enberg <[email protected]>
Acked-by: Pekka Enberg <[email protected]>
> ---
> kernel/pid.c | 7 -------
> mm/page_alloc.c | 14 +++-----------
> 2 files changed, 3 insertions(+), 18 deletions(-)
>
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 5fa1db4..31310b5 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -36,7 +36,6 @@
> #include <linux/pid_namespace.h>
> #include <linux/init_task.h>
> #include <linux/syscalls.h>
> -#include <linux/kmemleak.h>
>
> #define pid_hashfn(nr, ns) \
> hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
> @@ -513,12 +512,6 @@ void __init pidhash_init(void)
> pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
> if (!pid_hash)
> panic("Could not alloc pidhash!\n");
> - /*
> - * pid_hash contains references to allocated struct pid objects and it
> - * must be scanned by kmemleak to avoid false positives.
> - */
> - kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0,
> - GFP_KERNEL);
> for (i = 0; i < pidhash_size; i++)
> INIT_HLIST_HEAD(&pid_hash[i]);
> }
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index e0f2cdf..202ef6b 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4745,8 +4745,10 @@ void *__init alloc_large_system_hash(const char *tablename,
> * some pages at the end of hash table which
> * alloc_pages_exact() automatically does
> */
> - if (get_order(size) < MAX_ORDER)
> + if (get_order(size) < MAX_ORDER) {
> table = alloc_pages_exact(size, GFP_ATOMIC);
> + kmemleak_alloc(table, size, 1, GFP_ATOMIC);
> + }
> }
> } while (!table && size > PAGE_SIZE && --log2qty);
>
> @@ -4764,16 +4766,6 @@ void *__init alloc_large_system_hash(const char *tablename,
> if (_hash_mask)
> *_hash_mask = (1 << log2qty) - 1;
>
> - /*
> - * If hashdist is set, the table allocation is done with __vmalloc()
> - * which invokes the kmemleak_alloc() callback. This function may also
> - * be called before the slab and kmemleak are initialised when
> - * kmemleak simply buffers the request to be executed later
> - * (GFP_ATOMIC flag ignored in this case).
> - */
> - if (!hashdist)
> - kmemleak_alloc(table, size, 1, GFP_ATOMIC);
> -
> return table;
> }
>
>
Pekka Enberg <[email protected]> wrote:
> On Mon, 2009-07-06 at 11:51 +0100, Catalin Marinas wrote:
>> @@ -552,8 +558,29 @@ static void delete_object(unsigned long ptr)
>> */
>> spin_lock_irqsave(&object->lock, flags);
>> object->flags &= ~OBJECT_ALLOCATED;
>> + start = object->pointer;
>> + end = object->pointer + object->size;
>> + min_count = object->min_count;
>> spin_unlock_irqrestore(&object->lock, flags);
>> put_object(object);
>> +
>> + if (!size)
>> + return;
>> +
>> + /*
>> + * Partial freeing. Just create one or two objects that may result
>> + * from the memory block split.
>> + */
>> + if (in_atomic())
>> + gfp_flags = GFP_ATOMIC;
>> + else
>> + gfp_flags = GFP_KERNEL;
>
> Are you sure we can do this? There's a big fat comment on top of
> in_atomic() that suggest this is not safe.
It's not safe but I thought it's slightly better than not checking it.
> Why do we need to create the
> object here anyway and not in the _alloc_ paths where gfp flags are
> explicitly passed?
That's the free_bootmem case where Linux can only partially free a
block previously allocated with alloc_bootmem (that's why I haven't
tracked this from the beginning). So if it only frees some part in the
middle of a block, I would have to create two separate
kmemleak_objects (well, I can reuse one but I preferred fewer
modifications as this is not on a fast path anyway).
In the tests I did, free_bootmem is called before the slab allocator
is initialised and therefore before kmemleak is initialised, which
means that the requests are just logged and the kmemleak_* functions
are called later from the kmemleak_init() function. All allocations
via this function are fine to only use GFP_KERNEL.
If my reasoning above is correct, I'll only pass GFP_KERNEL and add a
comment in the code clarifying when the partial freeing happen.
Thanks.
--
Catalin
Catalin Marinas <[email protected]> wrote:
> Pekka Enberg <[email protected]> wrote:
>> On Mon, 2009-07-06 at 11:51 +0100, Catalin Marinas wrote:
>>> @@ -552,8 +558,29 @@ static void delete_object(unsigned long ptr)
>>> */
>>> spin_lock_irqsave(&object->lock, flags);
>>> object->flags &= ~OBJECT_ALLOCATED;
>>> + start = object->pointer;
>>> + end = object->pointer + object->size;
>>> + min_count = object->min_count;
>>> spin_unlock_irqrestore(&object->lock, flags);
>>> put_object(object);
>>> +
>>> + if (!size)
>>> + return;
>>> +
>>> + /*
>>> + * Partial freeing. Just create one or two objects that may result
>>> + * from the memory block split.
>>> + */
>>> + if (in_atomic())
>>> + gfp_flags = GFP_ATOMIC;
>>> + else
>>> + gfp_flags = GFP_KERNEL;
>>
>> Are you sure we can do this? There's a big fat comment on top of
>> in_atomic() that suggest this is not safe.
[...]
> That's the free_bootmem case where Linux can only partially free a
> block previously allocated with alloc_bootmem (that's why I haven't
> tracked this from the beginning). So if it only frees some part in the
> middle of a block, I would have to create two separate
> kmemleak_objects (well, I can reuse one but I preferred fewer
> modifications as this is not on a fast path anyway).
>
> In the tests I did, free_bootmem is called before the slab allocator
> is initialised and therefore before kmemleak is initialised, which
> means that the requests are just logged and the kmemleak_* functions
> are called later from the kmemleak_init() function. All allocations
> via this function are fine to only use GFP_KERNEL.
Here's an updated patch:
kmemleak: Allow partial freeing of memory blocks
From: Catalin Marinas <[email protected]>
Functions like free_bootmem() are allowed to free only part of a memory
block. This patch adds support for this via the kmemleak_free_part()
callback which removes the original object and creates one or two
additional objects as a result of the memory block split.
Signed-off-by: Catalin Marinas <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Pekka Enberg <[email protected]>
---
include/linux/kmemleak.h | 4 ++++
mm/kmemleak.c | 52 ++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 51 insertions(+), 5 deletions(-)
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 7796aed..6a63807 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -27,6 +27,7 @@ extern void kmemleak_init(void);
extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
gfp_t gfp);
extern void kmemleak_free(const void *ptr);
+extern void kmemleak_free_part(const void *ptr, size_t size);
extern void kmemleak_padding(const void *ptr, unsigned long offset,
size_t size);
extern void kmemleak_not_leak(const void *ptr);
@@ -71,6 +72,9 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
static inline void kmemleak_free(const void *ptr)
{
}
+static inline void kmemleak_free_part(const void *ptr, size_t size)
+{
+}
static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
{
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 5f7d8ae..8614255 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -210,6 +210,7 @@ static DEFINE_MUTEX(scan_mutex);
enum {
KMEMLEAK_ALLOC,
KMEMLEAK_FREE,
+ KMEMLEAK_FREE_PART,
KMEMLEAK_NOT_LEAK,
KMEMLEAK_IGNORE,
KMEMLEAK_SCAN_AREA,
@@ -522,15 +523,19 @@ out:
/*
* Remove the metadata (struct kmemleak_object) for a memory block from the
- * object_list and object_tree_root and decrement its use_count.
+ * object_list and object_tree_root and decrement its use_count. If the memory
+ * block is partially freed (size > 0), the function may create additional
+ * metadata for the remaining parts of the block.
*/
-static void delete_object(unsigned long ptr)
+static void delete_object(unsigned long ptr, size_t size)
{
unsigned long flags;
struct kmemleak_object *object;
+ unsigned long start, end;
+ unsigned int min_count;
write_lock_irqsave(&kmemleak_lock, flags);
- object = lookup_object(ptr, 0);
+ object = lookup_object(ptr, 1);
if (!object) {
#ifdef DEBUG
kmemleak_warn("Freeing unknown object at 0x%08lx\n",
@@ -552,8 +557,27 @@ static void delete_object(unsigned long ptr)
*/
spin_lock_irqsave(&object->lock, flags);
object->flags &= ~OBJECT_ALLOCATED;
+ start = object->pointer;
+ end = object->pointer + object->size;
+ min_count = object->min_count;
spin_unlock_irqrestore(&object->lock, flags);
put_object(object);
+
+ if (!size)
+ return;
+
+ /*
+ * Partial freeing. Just create one or two objects that may result
+ * from the memory block split. Note that partial freeing is only done
+ * by free_bootmem() and this happens before kmemleak_init() is
+ * called. The path below is only executed during early log recording
+ * in kmemleak_init(), so GFP_KERNEL is enough.
+ */
+ if (ptr > start)
+ create_object(start, ptr - start, min_count, GFP_KERNEL);
+ if (ptr + size < end)
+ create_object(ptr + size, end - ptr - size, min_count,
+ GFP_KERNEL);
}
/*
@@ -720,13 +744,28 @@ void kmemleak_free(const void *ptr)
pr_debug("%s(0x%p)\n", __func__, ptr);
if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
- delete_object((unsigned long)ptr);
+ delete_object((unsigned long)ptr, 0);
else if (atomic_read(&kmemleak_early_log))
log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
}
EXPORT_SYMBOL_GPL(kmemleak_free);
/*
+ * Partial memory freeing function callback. This function is usually called
+ * from bootmem allocator when (part of) a memory block is freed.
+ */
+void kmemleak_free_part(const void *ptr, size_t size)
+{
+ pr_debug("%s(0x%p)\n", __func__, ptr);
+
+ if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+ delete_object((unsigned long)ptr, size);
+ else if (atomic_read(&kmemleak_early_log))
+ log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free_part);
+
+/*
* Mark an already allocated memory block as a false positive. This will cause
* the block to no longer be reported as leak and always be scanned.
*/
@@ -1345,7 +1384,7 @@ static int kmemleak_cleanup_thread(void *arg)
rcu_read_lock();
list_for_each_entry_rcu(object, &object_list, object_list)
- delete_object(object->pointer);
+ delete_object(object->pointer, 0);
rcu_read_unlock();
mutex_unlock(&scan_mutex);
@@ -1440,6 +1479,9 @@ void __init kmemleak_init(void)
case KMEMLEAK_FREE:
kmemleak_free(log->ptr);
break;
+ case KMEMLEAK_FREE_PART:
+ kmemleak_free_part(log->ptr, log->size);
+ break;
case KMEMLEAK_NOT_LEAK:
kmemleak_not_leak(log->ptr);
break;
--
Catalin
On Tue, Jul 07, 2009 at 10:08:50AM +0300, Pekka Enberg wrote:
> On Mon, 2009-07-06 at 11:51 +0100, Catalin Marinas wrote:
> > This patch adds kmemleak_alloc/free callbacks to the bootmem allocator.
> > This would allow scanning of such blocks and help avoiding a whole class
> > of false positives and more kmemleak annotations.
> >
> > Signed-off-by: Catalin Marinas <[email protected]>
> > Cc: Ingo Molnar <[email protected]>
> > Cc: Pekka Enberg <[email protected]>
>
> Looks good to me!
>
> Acked-by: Pekka Enberg <[email protected]>
>
> But lets cc Johannes on this too.
> > @@ -597,7 +601,9 @@ restart:
> > void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
> > unsigned long goal)
> > {
> > - return ___alloc_bootmem_nopanic(size, align, goal, 0);
> > + void *ptr = ___alloc_bootmem_nopanic(size, align, goal, 0);
> > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> > + return ptr;
You may get an object from kzalloc() here, I don't think you want to
track that (again), right?
Pekka already worked out all the central places to catch 'slab already
available' allocations, they can probably help you place the hooks.
> > static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
> > @@ -631,7 +637,9 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
> > void * __init __alloc_bootmem(unsigned long size, unsigned long align,
> > unsigned long goal)
> > {
> > - return ___alloc_bootmem(size, align, goal, 0);
> > + void *ptr = ___alloc_bootmem(size, align, goal, 0);
> > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> > + return ptr;
Same here.
> > #ifdef CONFIG_SPARSEMEM
> > @@ -707,14 +719,18 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
> > return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
> >
> > ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
> > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> > if (ptr)
> > return ptr;
> >
> > ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
> > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> > if (ptr)
> > return ptr;
> >
> > - return __alloc_bootmem_nopanic(size, align, goal);
> > + ptr = __alloc_bootmem_nopanic(size, align, goal);
> > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> > + return ptr;
> > }
Can you use a central exit and goto?
> > #ifndef ARCH_LOW_ADDRESS_LIMIT
> > @@ -737,7 +753,9 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
> > void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
> > unsigned long goal)
> > {
> > - return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
> > + void *ptr = ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
> > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> > + return ptr;
> > }
Possible slab object.
> > @@ -758,9 +776,13 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
> > void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
> > unsigned long align, unsigned long goal)
> > {
> > + void *ptr;
> > +
> > if (WARN_ON_ONCE(slab_is_available()))
> > return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
> >
> > - return ___alloc_bootmem_node(pgdat->bdata, size, align,
> > - goal, ARCH_LOW_ADDRESS_LIMIT);
> > + ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
> > + goal, ARCH_LOW_ADDRESS_LIMIT);
> > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
These GFP_KERNEL startled me. We know for sure that this code runs in
earlylog mode only and gfp is unused, right? Can you perhaps just
pass 0 for gfp instead?
Hannes
On Tue, 2009-07-07 at 18:53 +0200, Johannes Weiner wrote:
> On Tue, Jul 07, 2009 at 10:08:50AM +0300, Pekka Enberg wrote:
> > On Mon, 2009-07-06 at 11:51 +0100, Catalin Marinas wrote:
> > > @@ -597,7 +601,9 @@ restart:
> > > void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
> > > unsigned long goal)
> > > {
> > > - return ___alloc_bootmem_nopanic(size, align, goal, 0);
> > > + void *ptr = ___alloc_bootmem_nopanic(size, align, goal, 0);
> > > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> > > + return ptr;
>
> You may get an object from kzalloc() here, I don't think you want to
> track that (again), right?
You are write, I missed the alloc_arch_preferred_bootmem() function
which may call kzalloc().
> Pekka already worked out all the central places to catch 'slab already
> available' allocations, they can probably help you place the hooks.
It seems that alloc_bootmem_core() is central to all the bootmem
allocations. Is it OK to place the kmemleak_alloc hook only in this
function?
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 5a649a0..74cbb34 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -520,6 +520,7 @@ find_block:
region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
start_off);
memset(region, 0, size);
+ kmemleak_alloc(region, size, 1, 0);
return region;
}
> > > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
>
> These GFP_KERNEL startled me. We know for sure that this code runs in
> earlylog mode only and gfp is unused, right? Can you perhaps just
> pass 0 for gfp instead?
Yes, indeed.
Thanks for your comments.
--
Catalin
On Tue, 2009-07-07 at 14:39 +0100, Catalin Marinas wrote:
> @@ -552,8 +557,27 @@ static void delete_object(unsigned long ptr)
> */
> spin_lock_irqsave(&object->lock, flags);
> object->flags &= ~OBJECT_ALLOCATED;
> + start = object->pointer;
> + end = object->pointer + object->size;
> + min_count = object->min_count;
> spin_unlock_irqrestore(&object->lock, flags);
> put_object(object);
> +
> + if (!size)
> + return;
> +
> + /*
> + * Partial freeing. Just create one or two objects that may result
> + * from the memory block split. Note that partial freeing is only done
> + * by free_bootmem() and this happens before kmemleak_init() is
> + * called. The path below is only executed during early log recording
> + * in kmemleak_init(), so GFP_KERNEL is enough.
> + */
> + if (ptr > start)
> + create_object(start, ptr - start, min_count, GFP_KERNEL);
> + if (ptr + size < end)
> + create_object(ptr + size, end - ptr - size, min_count,
> + GFP_KERNEL);
> }
Looks good to me. I think it would be better to have
delete_object_full() and delete_object_part(), and extract the common
code to __delete_object() or something instead of passing the magic zero
from kmemleak_free().
In any case:
Acked-by: Pekka Enberg <[email protected]>
Pekka
Hi Catalin,
On Tue, 2009-07-07 at 23:09 +0100, Catalin Marinas wrote:
> On Tue, 2009-07-07 at 18:53 +0200, Johannes Weiner wrote:
> > On Tue, Jul 07, 2009 at 10:08:50AM +0300, Pekka Enberg wrote:
> > > On Mon, 2009-07-06 at 11:51 +0100, Catalin Marinas wrote:
> > > > @@ -597,7 +601,9 @@ restart:
> > > > void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
> > > > unsigned long goal)
> > > > {
> > > > - return ___alloc_bootmem_nopanic(size, align, goal, 0);
> > > > + void *ptr = ___alloc_bootmem_nopanic(size, align, goal, 0);
> > > > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> > > > + return ptr;
> >
> > You may get an object from kzalloc() here, I don't think you want to
> > track that (again), right?
>
> You are write, I missed the alloc_arch_preferred_bootmem() function
> which may call kzalloc().
>
> > Pekka already worked out all the central places to catch 'slab already
> > available' allocations, they can probably help you place the hooks.
>
> It seems that alloc_bootmem_core() is central to all the bootmem
> allocations. Is it OK to place the kmemleak_alloc hook only in this
> function?
I think so. Johannes?
>
> diff --git a/mm/bootmem.c b/mm/bootmem.c
> index 5a649a0..74cbb34 100644
> --- a/mm/bootmem.c
> +++ b/mm/bootmem.c
> @@ -520,6 +520,7 @@ find_block:
> region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
> start_off);
> memset(region, 0, size);
> + kmemleak_alloc(region, size, 1, 0);
> return region;
> }
On Wed, 2009-07-08 at 09:40 +0300, Pekka Enberg wrote:
> Looks good to me. I think it would be better to have
> delete_object_full() and delete_object_part(), and extract the common
> code to __delete_object() or something instead of passing the magic zero
> from kmemleak_free().
Yes, it make sense:
kmemleak: Allow partial freeing of memory blocks
From: Catalin Marinas <[email protected]>
Functions like free_bootmem() are allowed to free only part of a memory
block. This patch adds support for this via the kmemleak_free_part()
callback which removes the original object and creates one or two
additional objects as a result of the memory block split.
Signed-off-by: Catalin Marinas <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Pekka Enberg <[email protected]>
---
include/linux/kmemleak.h | 4 ++
mm/kmemleak.c | 95 +++++++++++++++++++++++++++++++++++++++-------
2 files changed, 85 insertions(+), 14 deletions(-)
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 7796aed..6a63807 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -27,6 +27,7 @@ extern void kmemleak_init(void);
extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
gfp_t gfp);
extern void kmemleak_free(const void *ptr);
+extern void kmemleak_free_part(const void *ptr, size_t size);
extern void kmemleak_padding(const void *ptr, unsigned long offset,
size_t size);
extern void kmemleak_not_leak(const void *ptr);
@@ -71,6 +72,9 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
static inline void kmemleak_free(const void *ptr)
{
}
+static inline void kmemleak_free_part(const void *ptr, size_t size)
+{
+}
static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
{
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 5f7d8ae..7b4647d 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -210,6 +210,7 @@ static DEFINE_MUTEX(scan_mutex);
enum {
KMEMLEAK_ALLOC,
KMEMLEAK_FREE,
+ KMEMLEAK_FREE_PART,
KMEMLEAK_NOT_LEAK,
KMEMLEAK_IGNORE,
KMEMLEAK_SCAN_AREA,
@@ -524,27 +525,17 @@ out:
* Remove the metadata (struct kmemleak_object) for a memory block from the
* object_list and object_tree_root and decrement its use_count.
*/
-static void delete_object(unsigned long ptr)
+static void __delete_object(struct kmemleak_object *object)
{
unsigned long flags;
- struct kmemleak_object *object;
write_lock_irqsave(&kmemleak_lock, flags);
- object = lookup_object(ptr, 0);
- if (!object) {
-#ifdef DEBUG
- kmemleak_warn("Freeing unknown object at 0x%08lx\n",
- ptr);
-#endif
- write_unlock_irqrestore(&kmemleak_lock, flags);
- return;
- }
prio_tree_remove(&object_tree_root, &object->tree_node);
list_del_rcu(&object->object_list);
write_unlock_irqrestore(&kmemleak_lock, flags);
WARN_ON(!(object->flags & OBJECT_ALLOCATED));
- WARN_ON(atomic_read(&object->use_count) < 1);
+ WARN_ON(atomic_read(&object->use_count) < 2);
/*
* Locking here also ensures that the corresponding memory block
@@ -557,6 +548,64 @@ static void delete_object(unsigned long ptr)
}
/*
+ * Look up the metadata (struct kmemleak_object) corresponding to ptr and
+ * delete it.
+ */
+static void delete_object_full(unsigned long ptr)
+{
+ struct kmemleak_object *object;
+
+ object = find_and_get_object(ptr, 0);
+ if (!object) {
+#ifdef DEBUG
+ kmemleak_warn("Freeing unknown object at 0x%08lx\n",
+ ptr);
+#endif
+ return;
+ }
+ __delete_object(object);
+ put_object(object);
+}
+
+/*
+ * Look up the metadata (struct kmemleak_object) corresponding to ptr and
+ * delete it. If the memory block is partially freed, the function may create
+ * additional metadata for the remaining parts of the block.
+ */
+static void delete_object_part(unsigned long ptr, size_t size)
+{
+ struct kmemleak_object *object;
+ unsigned long start, end;
+
+ object = find_and_get_object(ptr, 1);
+ if (!object) {
+#ifdef DEBUG
+ kmemleak_warn("Partially freeing unknown object at 0x%08lx "
+ "(size %zu)\n", ptr, size);
+#endif
+ return;
+ }
+ __delete_object(object);
+
+ /*
+ * Create one or two objects that may result from the memory block
+ * split. Note that partial freeing is only done by free_bootmem() and
+ * this happens before kmemleak_init() is called. The path below is
+ * only executed during early log recording in kmemleak_init(), so
+ * GFP_KERNEL is enough.
+ */
+ start = object->pointer;
+ end = object->pointer + object->size;
+ if (ptr > start)
+ create_object(start, ptr - start, object->min_count,
+ GFP_KERNEL);
+ if (ptr + size < end)
+ create_object(ptr + size, end - ptr - size, object->min_count,
+ GFP_KERNEL);
+
+ put_object(object);
+}
+/*
* Make a object permanently as gray-colored so that it can no longer be
* reported as a leak. This is used in general to mark a false positive.
*/
@@ -720,13 +769,28 @@ void kmemleak_free(const void *ptr)
pr_debug("%s(0x%p)\n", __func__, ptr);
if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
- delete_object((unsigned long)ptr);
+ delete_object_full((unsigned long)ptr);
else if (atomic_read(&kmemleak_early_log))
log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
}
EXPORT_SYMBOL_GPL(kmemleak_free);
/*
+ * Partial memory freeing function callback. This function is usually called
+ * from bootmem allocator when (part of) a memory block is freed.
+ */
+void kmemleak_free_part(const void *ptr, size_t size)
+{
+ pr_debug("%s(0x%p)\n", __func__, ptr);
+
+ if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+ delete_object_part((unsigned long)ptr, size);
+ else if (atomic_read(&kmemleak_early_log))
+ log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free_part);
+
+/*
* Mark an already allocated memory block as a false positive. This will cause
* the block to no longer be reported as leak and always be scanned.
*/
@@ -1345,7 +1409,7 @@ static int kmemleak_cleanup_thread(void *arg)
rcu_read_lock();
list_for_each_entry_rcu(object, &object_list, object_list)
- delete_object(object->pointer);
+ delete_object_full(object->pointer);
rcu_read_unlock();
mutex_unlock(&scan_mutex);
@@ -1440,6 +1504,9 @@ void __init kmemleak_init(void)
case KMEMLEAK_FREE:
kmemleak_free(log->ptr);
break;
+ case KMEMLEAK_FREE_PART:
+ kmemleak_free_part(log->ptr, log->size);
+ break;
case KMEMLEAK_NOT_LEAK:
kmemleak_not_leak(log->ptr);
break;
--
Catalin
On Wed, 2009-07-08 at 09:48 +0300, Pekka Enberg wrote:
> On Tue, 2009-07-07 at 23:09 +0100, Catalin Marinas wrote:
> > On Tue, 2009-07-07 at 18:53 +0200, Johannes Weiner wrote:
> > > On Tue, Jul 07, 2009 at 10:08:50AM +0300, Pekka Enberg wrote:
> > > > On Mon, 2009-07-06 at 11:51 +0100, Catalin Marinas wrote:
> > > > > @@ -597,7 +601,9 @@ restart:
> > > > > void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
> > > > > unsigned long goal)
> > > > > {
> > > > > - return ___alloc_bootmem_nopanic(size, align, goal, 0);
> > > > > + void *ptr = ___alloc_bootmem_nopanic(size, align, goal, 0);
> > > > > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> > > > > + return ptr;
> > >
> > > You may get an object from kzalloc() here, I don't think you want to
> > > track that (again), right?
> >
> > You are write, I missed the alloc_arch_preferred_bootmem() function
> > which may call kzalloc().
> >
> > > Pekka already worked out all the central places to catch 'slab already
> > > available' allocations, they can probably help you place the hooks.
> >
> > It seems that alloc_bootmem_core() is central to all the bootmem
> > allocations. Is it OK to place the kmemleak_alloc hook only in this
> > function?
>
> I think so. Johannes?
To get a better view, here's the complete patch:
kmemleak: Add callbacks to the bootmem allocator
From: Catalin Marinas <[email protected]>
This patch adds kmemleak_alloc/free callbacks to the bootmem allocator.
This would allow scanning of such blocks and help avoiding a whole class
of false positives and more kmemleak annotations.
Signed-off-by: Catalin Marinas <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Pekka Enberg <[email protected]>
---
mm/bootmem.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d2a9ce9..90f3ed0 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -335,6 +335,8 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
{
unsigned long start, end;
+ kmemleak_free_part(__va(physaddr), size);
+
start = PFN_UP(physaddr);
end = PFN_DOWN(physaddr + size);
@@ -354,6 +356,8 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
{
unsigned long start, end;
+ kmemleak_free_part(__va(addr), size);
+
start = PFN_UP(addr);
end = PFN_DOWN(addr + size);
@@ -516,6 +520,7 @@ find_block:
region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
start_off);
memset(region, 0, size);
+ kmemleak_alloc(region, size, 1, 0);
return region;
}
--
Catalin
On Wed, 2009-07-08 at 10:42 +0100, Catalin Marinas wrote:
> kmemleak: Allow partial freeing of memory blocks
>
> From: Catalin Marinas <[email protected]>
>
> Functions like free_bootmem() are allowed to free only part of a memory
> block. This patch adds support for this via the kmemleak_free_part()
> callback which removes the original object and creates one or two
> additional objects as a result of the memory block split.
>
> Signed-off-by: Catalin Marinas <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Pekka Enberg <[email protected]>
Looks good to me!
Acked-by: Pekka Enberg <[email protected]>
> ---
> include/linux/kmemleak.h | 4 ++
> mm/kmemleak.c | 95 +++++++++++++++++++++++++++++++++++++++-------
> 2 files changed, 85 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
> index 7796aed..6a63807 100644
> --- a/include/linux/kmemleak.h
> +++ b/include/linux/kmemleak.h
> @@ -27,6 +27,7 @@ extern void kmemleak_init(void);
> extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
> gfp_t gfp);
> extern void kmemleak_free(const void *ptr);
> +extern void kmemleak_free_part(const void *ptr, size_t size);
> extern void kmemleak_padding(const void *ptr, unsigned long offset,
> size_t size);
> extern void kmemleak_not_leak(const void *ptr);
> @@ -71,6 +72,9 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
> static inline void kmemleak_free(const void *ptr)
> {
> }
> +static inline void kmemleak_free_part(const void *ptr, size_t size)
> +{
> +}
> static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
> {
> }
> diff --git a/mm/kmemleak.c b/mm/kmemleak.c
> index 5f7d8ae..7b4647d 100644
> --- a/mm/kmemleak.c
> +++ b/mm/kmemleak.c
> @@ -210,6 +210,7 @@ static DEFINE_MUTEX(scan_mutex);
> enum {
> KMEMLEAK_ALLOC,
> KMEMLEAK_FREE,
> + KMEMLEAK_FREE_PART,
> KMEMLEAK_NOT_LEAK,
> KMEMLEAK_IGNORE,
> KMEMLEAK_SCAN_AREA,
> @@ -524,27 +525,17 @@ out:
> * Remove the metadata (struct kmemleak_object) for a memory block from the
> * object_list and object_tree_root and decrement its use_count.
> */
> -static void delete_object(unsigned long ptr)
> +static void __delete_object(struct kmemleak_object *object)
> {
> unsigned long flags;
> - struct kmemleak_object *object;
>
> write_lock_irqsave(&kmemleak_lock, flags);
> - object = lookup_object(ptr, 0);
> - if (!object) {
> -#ifdef DEBUG
> - kmemleak_warn("Freeing unknown object at 0x%08lx\n",
> - ptr);
> -#endif
> - write_unlock_irqrestore(&kmemleak_lock, flags);
> - return;
> - }
> prio_tree_remove(&object_tree_root, &object->tree_node);
> list_del_rcu(&object->object_list);
> write_unlock_irqrestore(&kmemleak_lock, flags);
>
> WARN_ON(!(object->flags & OBJECT_ALLOCATED));
> - WARN_ON(atomic_read(&object->use_count) < 1);
> + WARN_ON(atomic_read(&object->use_count) < 2);
>
> /*
> * Locking here also ensures that the corresponding memory block
> @@ -557,6 +548,64 @@ static void delete_object(unsigned long ptr)
> }
>
> /*
> + * Look up the metadata (struct kmemleak_object) corresponding to ptr and
> + * delete it.
> + */
> +static void delete_object_full(unsigned long ptr)
> +{
> + struct kmemleak_object *object;
> +
> + object = find_and_get_object(ptr, 0);
> + if (!object) {
> +#ifdef DEBUG
> + kmemleak_warn("Freeing unknown object at 0x%08lx\n",
> + ptr);
> +#endif
> + return;
> + }
> + __delete_object(object);
> + put_object(object);
> +}
> +
> +/*
> + * Look up the metadata (struct kmemleak_object) corresponding to ptr and
> + * delete it. If the memory block is partially freed, the function may create
> + * additional metadata for the remaining parts of the block.
> + */
> +static void delete_object_part(unsigned long ptr, size_t size)
> +{
> + struct kmemleak_object *object;
> + unsigned long start, end;
> +
> + object = find_and_get_object(ptr, 1);
> + if (!object) {
> +#ifdef DEBUG
> + kmemleak_warn("Partially freeing unknown object at 0x%08lx "
> + "(size %zu)\n", ptr, size);
> +#endif
> + return;
> + }
> + __delete_object(object);
> +
> + /*
> + * Create one or two objects that may result from the memory block
> + * split. Note that partial freeing is only done by free_bootmem() and
> + * this happens before kmemleak_init() is called. The path below is
> + * only executed during early log recording in kmemleak_init(), so
> + * GFP_KERNEL is enough.
> + */
> + start = object->pointer;
> + end = object->pointer + object->size;
> + if (ptr > start)
> + create_object(start, ptr - start, object->min_count,
> + GFP_KERNEL);
> + if (ptr + size < end)
> + create_object(ptr + size, end - ptr - size, object->min_count,
> + GFP_KERNEL);
> +
> + put_object(object);
> +}
> +/*
> * Make a object permanently as gray-colored so that it can no longer be
> * reported as a leak. This is used in general to mark a false positive.
> */
> @@ -720,13 +769,28 @@ void kmemleak_free(const void *ptr)
> pr_debug("%s(0x%p)\n", __func__, ptr);
>
> if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
> - delete_object((unsigned long)ptr);
> + delete_object_full((unsigned long)ptr);
> else if (atomic_read(&kmemleak_early_log))
> log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
> }
> EXPORT_SYMBOL_GPL(kmemleak_free);
>
> /*
> + * Partial memory freeing function callback. This function is usually called
> + * from bootmem allocator when (part of) a memory block is freed.
> + */
> +void kmemleak_free_part(const void *ptr, size_t size)
> +{
> + pr_debug("%s(0x%p)\n", __func__, ptr);
> +
> + if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
> + delete_object_part((unsigned long)ptr, size);
> + else if (atomic_read(&kmemleak_early_log))
> + log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
> +}
> +EXPORT_SYMBOL_GPL(kmemleak_free_part);
> +
> +/*
> * Mark an already allocated memory block as a false positive. This will cause
> * the block to no longer be reported as leak and always be scanned.
> */
> @@ -1345,7 +1409,7 @@ static int kmemleak_cleanup_thread(void *arg)
>
> rcu_read_lock();
> list_for_each_entry_rcu(object, &object_list, object_list)
> - delete_object(object->pointer);
> + delete_object_full(object->pointer);
> rcu_read_unlock();
> mutex_unlock(&scan_mutex);
>
> @@ -1440,6 +1504,9 @@ void __init kmemleak_init(void)
> case KMEMLEAK_FREE:
> kmemleak_free(log->ptr);
> break;
> + case KMEMLEAK_FREE_PART:
> + kmemleak_free_part(log->ptr, log->size);
> + break;
> case KMEMLEAK_NOT_LEAK:
> kmemleak_not_leak(log->ptr);
> break;
>
>
On Tue, Jul 07, 2009 at 11:09:46PM +0100, Catalin Marinas wrote:
> It seems that alloc_bootmem_core() is central to all the bootmem
> allocations. Is it OK to place the kmemleak_alloc hook only in this
> function?
>
> diff --git a/mm/bootmem.c b/mm/bootmem.c
> index 5a649a0..74cbb34 100644
> --- a/mm/bootmem.c
> +++ b/mm/bootmem.c
> @@ -520,6 +520,7 @@ find_block:
> region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
> start_off);
> memset(region, 0, size);
> + kmemleak_alloc(region, size, 1, 0);
> return region;
> }
Yes, that should work.
> > > > + kmemleak_alloc(ptr, size, 1, GFP_KERNEL);
> >
> > These GFP_KERNEL startled me. We know for sure that this code runs in
> > earlylog mode only and gfp is unused, right? Can you perhaps just
> > pass 0 for gfp instead?
>
> Yes, indeed.
Thank you.
Hannes
On Wed, 2009-07-08 at 11:46 +0200, Johannes Weiner wrote:
> On Tue, Jul 07, 2009 at 11:09:46PM +0100, Catalin Marinas wrote:
>
> > It seems that alloc_bootmem_core() is central to all the bootmem
> > allocations. Is it OK to place the kmemleak_alloc hook only in this
> > function?
> >
> > diff --git a/mm/bootmem.c b/mm/bootmem.c
> > index 5a649a0..74cbb34 100644
> > --- a/mm/bootmem.c
> > +++ b/mm/bootmem.c
> > @@ -520,6 +520,7 @@ find_block:
> > region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
> > start_off);
> > memset(region, 0, size);
> > + kmemleak_alloc(region, size, 1, 0);
> > return region;
> > }
>
> Yes, that should work.
Thanks. May I add your Acked-by line to the final patch?
--
Catalin
On Wed, 2009-07-08 at 11:02 +0100, Catalin Marinas wrote:
> On Wed, 2009-07-08 at 11:46 +0200, Johannes Weiner wrote:
> > On Tue, Jul 07, 2009 at 11:09:46PM +0100, Catalin Marinas wrote:
> >
> > > It seems that alloc_bootmem_core() is central to all the bootmem
> > > allocations. Is it OK to place the kmemleak_alloc hook only in this
> > > function?
> > >
> > > diff --git a/mm/bootmem.c b/mm/bootmem.c
> > > index 5a649a0..74cbb34 100644
> > > --- a/mm/bootmem.c
> > > +++ b/mm/bootmem.c
> > > @@ -520,6 +520,7 @@ find_block:
> > > region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
> > > start_off);
> > > memset(region, 0, size);
> > > + kmemleak_alloc(region, size, 1, 0);
> > > return region;
> > > }
> >
> > Yes, that should work.
>
> Thanks. May I add your Acked-by line to the final patch?
I guess you mean Johannes but while we're at it:
Acked-by: Pekka Enberg <[email protected]>
On Wed, Jul 08, 2009 at 10:43:51AM +0100, Catalin Marinas wrote:
> kmemleak: Add callbacks to the bootmem allocator
>
> From: Catalin Marinas <[email protected]>
>
> This patch adds kmemleak_alloc/free callbacks to the bootmem allocator.
> This would allow scanning of such blocks and help avoiding a whole class
> of false positives and more kmemleak annotations.
>
> Signed-off-by: Catalin Marinas <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Pekka Enberg <[email protected]>
Reviewed-by: Johannes Weiner <[email protected]>
> ---
> mm/bootmem.c | 5 +++++
> 1 files changed, 5 insertions(+), 0 deletions(-)
>
> diff --git a/mm/bootmem.c b/mm/bootmem.c
> index d2a9ce9..90f3ed0 100644
> --- a/mm/bootmem.c
> +++ b/mm/bootmem.c
> @@ -335,6 +335,8 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
> {
> unsigned long start, end;
>
> + kmemleak_free_part(__va(physaddr), size);
> +
> start = PFN_UP(physaddr);
> end = PFN_DOWN(physaddr + size);
>
> @@ -354,6 +356,8 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
> {
> unsigned long start, end;
>
> + kmemleak_free_part(__va(addr), size);
> +
> start = PFN_UP(addr);
> end = PFN_DOWN(addr + size);
>
> @@ -516,6 +520,7 @@ find_block:
> region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
> start_off);
> memset(region, 0, size);
> + kmemleak_alloc(region, size, 1, 0);
> return region;
> }