From: Zhaoyang Huang <[email protected]>
The page with special page type will be deemed as bad page wrongly since
type share the same address with mapcount.
Signed-off-by: Zhaoyang Huang <[email protected]>
---
include/linux/page-flags.h | 4 ++--
mm/page_alloc.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e66f7aa..5d3274b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -946,9 +946,9 @@ static inline bool is_page_hwpoison(struct page *page)
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
-static inline int page_has_type(struct page *page)
+static inline bool page_has_type(struct page *page)
{
- return (int)page->page_type < PAGE_MAPCOUNT_RESERVE;
+ return page->page_type < PAGE_MAPCOUNT_RESERVE;
}
#define PAGE_TYPE_OPS(uname, lname) \
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e008a3d..3714680 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1164,7 +1164,7 @@ int split_free_page(struct page *free_page,
static inline bool page_expected_state(struct page *page,
unsigned long check_flags)
{
- if (unlikely(atomic_read(&page->_mapcount) != -1))
+ if (unlikely(!page_has_type(page) && atomic_read(&page->_mapcount) != -1))
return false;
if (unlikely((unsigned long)page->mapping |
--
1.9.1
From: Zhaoyang Huang <[email protected]>
Kthread and drivers could fetch memory via alloc_pages directly which make them
hard to debug when leaking. Solve this by introducing __GFP_TRACELEAK and reuse
kmemleak mechanism which unified most of kernel cosuming pages into kmemleak.
This patch has been tested with alloc_pages(__GFP_TRACKLEAK) & (__GFP_TRACKLEAK|__GFP_COMP)
and got proved as effective.
unreferenced object 0xffffff807c620000 (size 65536):
comm "[email protected]", pid 745, jiffies 4294906308 (age 5136.616s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<00000000ffefbfdf>] __alloc_pages_nodemask+0x108/0x3a4
[<0000000083595277>] ion_page_pool_alloc+0x178/0x234
[<000000008267995a>] ion_system_heap_allocate+0x13c/0x708
[<00000000d4df5a5e>] ion_buffer_create+0x98/0x67c
[<0000000043fa6683>] ion_dmabuf_alloc+0xcc/0x1c0
[<000000000d1db17e>] ion_ioctl+0x150/0x350
[<00000000a2b89048>] do_vfs_ioctl+0x5d4/0xa94
[<000000008e9b61d3>] __arm64_sys_ioctl+0x14c/0x164
[<00000000114425a9>] el0_svc_common+0xd0/0x23c
[<00000000ec9cb1b1>] el0_svc_handler+0x2c/0x3c
[<00000000e44a2c21>] el0_svc+0x8/0x100
unreferenced object 0xffffff807c189000 (size 4096):
comm "[email protected]", pid 745, jiffies 4294906309 (age 5136.612s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<00000000ffefbfdf>] __alloc_pages_nodemask+0x108/0x3a4
[<0000000083595277>] ion_page_pool_alloc+0x178/0x234
[<00000000b30c4562>] ion_system_heap_allocate+0x160/0x708
[<00000000d4df5a5e>] ion_buffer_create+0x98/0x67c
[<0000000043fa6683>] ion_dmabuf_alloc+0xcc/0x1c0
[<000000000d1db17e>] ion_ioctl+0x150/0x350
[<00000000a2b89048>] do_vfs_ioctl+0x5d4/0xa94
[<000000008e9b61d3>] __arm64_sys_ioctl+0x14c/0x164
[<00000000114425a9>] el0_svc_common+0xd0/0x23c
[<00000000ec9cb1b1>] el0_svc_handler+0x2c/0x3c
[<00000000e44a2c21>] el0_svc+0x8/0x100
Signed-off-by: Zhaoyang Huang <[email protected]>
---
v2: code update
v3: update code and Documentation
---
---
Documentation/dev-tools/kmemleak.rst | 5 ++++-
include/linux/gfp.h | 8 +++++++-
include/linux/page-flags.h | 3 +++
mm/page_alloc.c | 14 ++++++++++++++
4 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst
index 1c935f4..b1128fe 100644
--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@@ -81,7 +81,7 @@ Basic Algorithm
---------------
The memory allocations via :c:func:`kmalloc`, :c:func:`vmalloc`,
-:c:func:`kmem_cache_alloc` and
+:c:func:`kmem_cache_alloc`, :c:func:`alloc_pages(__GFP_TRACKLEAK)` (1)and
friends are traced and the pointers, together with additional
information like size and stack trace, are stored in a rbtree.
The corresponding freeing function calls are tracked and the pointers
@@ -257,3 +257,6 @@ memory leaks``. Then read the file to see then::
Removing the module with ``rmmod kmemleak_test`` should also trigger some
kmemleak results.
+
+(1)Don't use __GFP_TRACKLEAK when getting pages for vm_iomap_memory which map
+physical address from kernel to userspace.
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2d2ccae..53464c6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -68,6 +68,11 @@
#else
#define ___GFP_NOLOCKDEP 0
#endif
+#ifdef CONFIG_HAVE_DEBUG_KMEMLEAK
+#define ___GFP_TRACKLEAK 0x10000000u
+#else
+#define ___GFP_TRACKLEAK 0
+#endif
/* If the above are modified, __GFP_BITS_SHIFT may need updating */
/*
@@ -259,12 +264,13 @@
#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO)
#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON)
#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON)
+#define __GFP_TRACKLEAK ((__force gfp_t)___GFP_TRACKLEAK)
/* Disable lockdep for GFP context tracking */
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
/* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP) + IS_ENABLED(CONFIG_HAVE_DEBUG_KMEMLEAK))
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/**
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5d3274b..1374e29 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -942,6 +942,7 @@ static inline bool is_page_hwpoison(struct page *page)
#define PG_offline 0x00000100
#define PG_table 0x00000200
#define PG_guard 0x00000400
+#define PG_trackleak 0x00000800
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -1012,6 +1013,8 @@ static inline bool page_has_type(struct page *page)
*/
PAGE_TYPE_OPS(Guard, guard)
+PAGE_TYPE_OPS(Trackleak, trackleak)
+
extern bool is_free_buddy_page(struct page *page);
PAGEFLAG(Isolated, isolated, PF_ANY);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3714680..9e036f1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1357,6 +1357,10 @@ static __always_inline bool free_pages_prepare(struct page *page,
(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
}
}
+ if (PageTrackleak(page)) {
+ __ClearPageTrackleak(page);
+ kmemleak_free(page_address(page));
+ }
if (PageMappingFlags(page))
page->mapping = NULL;
if (memcg_kmem_enabled() && PageMemcgKmem(page))
@@ -1521,6 +1525,11 @@ static void free_pcppages_bulk(struct zone *zone, int count,
if (unlikely(isolated_pageblocks))
mt = get_pageblock_migratetype(page);
+ if (PageTrackleak(page)) {
+ __ClearPageTrackleak(page);
+ kmemleak_free(page_address(page));
+ }
+
__free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
trace_mm_page_pcpu_drain(page, order, mt);
} while (count > 0 && !list_empty(list));
@@ -2468,6 +2477,11 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
set_page_pfmemalloc(page);
else
clear_page_pfmemalloc(page);
+
+ if (gfp_flags & __GFP_TRACKLEAK) {
+ kmemleak_alloc(page_address(page), PAGE_SIZE << order, 1, gfp_flags & ~__GFP_TRACKLEAK);
+ __SetPageTrackleak(page);
+ }
}
/*
--
1.9.1
On Wed, Sep 14, 2022 at 11:37 AM zhaoyang.huang
<[email protected]> wrote:
>
> From: Zhaoyang Huang <[email protected]>
>
> The page with special page type will be deemed as bad page wrongly since
> type share the same address with mapcount.
>
> Signed-off-by: Zhaoyang Huang <[email protected]>
> ---
> include/linux/page-flags.h | 4 ++--
> mm/page_alloc.c | 2 +-
> 2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index e66f7aa..5d3274b 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -946,9 +946,9 @@ static inline bool is_page_hwpoison(struct page *page)
> #define PageType(page, flag) \
> ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
>
> -static inline int page_has_type(struct page *page)
> +static inline bool page_has_type(struct page *page)
> {
> - return (int)page->page_type < PAGE_MAPCOUNT_RESERVE;
> + return (int)page->page_type < PAGE_MAPCOUNT_RESERVE;
amend the type conversion
> }
>
> #define PAGE_TYPE_OPS(uname, lname) \
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index e008a3d..3714680 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1164,7 +1164,7 @@ int split_free_page(struct page *free_page,
> static inline bool page_expected_state(struct page *page,
> unsigned long check_flags)
> {
> - if (unlikely(atomic_read(&page->_mapcount) != -1))
> + if (unlikely(!page_has_type(page) && atomic_read(&page->_mapcount) != -1))
> return false;
>
> if (unlikely((unsigned long)page->mapping |
> --
> 1.9.1
>
On Wed, Sep 14, 2022 at 11:37:00AM +0800, zhaoyang.huang wrote:
> From: Zhaoyang Huang <[email protected]>
>
> The page with special page type will be deemed as bad page wrongly since
> type share the same address with mapcount.
That's not wrongly. You didn't clear the bit. I told you you would
need to do that in the first version of the patch you sent.
On Wed, Sep 14, 2022 at 3:35 PM Matthew Wilcox <[email protected]> wrote:
>
> On Wed, Sep 14, 2022 at 11:37:00AM +0800, zhaoyang.huang wrote:
> > From: Zhaoyang Huang <[email protected]>
> >
> > The page with special page type will be deemed as bad page wrongly since
> > type share the same address with mapcount.
>
> That's not wrongly. You didn't clear the bit. I told you you would
> need to do that in the first version of the patch you sent.
Yes, I have cleared the PG_trackleak since v2 as you suggested.
However, IMHO, the page which has page type should be skipped for
mapcount check. The present problem is there is no invoking of
free_pages_prepare within the chain of
drain_pages_zone->free_pcppages_bulk, special pages with
page->type(PG_guard and PG_trackleak etc) will be leaked as
bulkfree_pcp_prepare will deem it as bad by checking the mapcount.
On Wed, Sep 14, 2022 at 11:37:01AM +0800, zhaoyang.huang wrote:
> ---
> v2: code update
> v3: update code and Documentation
This is really not good enough. What changed?
The documentation is also not good enough. It needs to mention:
- This cannot be used for GFP_HIGHMEM allocations.
- This cannot be used for pages which are mapped into userspace.
I also still want to see selftests. order-0, order-N (with and without
__GFP_COMP). What happens if you allocate an order-N page without
GFP_COMP, take an extra ref on the first page, call free_pages() and
then one of the recently-freed pages is allocated again while you still
have the reference on the first page?
I believe Andrew also suggested that
if (PageTrackleak(page))
become always-false if the CONFIG option is disabled.
> +#ifdef CONFIG_HAVE_DEBUG_KMEMLEAK
This is the wrong CONFIG option, it should be CONFIG_DEBUG_KMEMLEAK.
Add to this the very real question of how useful is this, and I'm not
getting warm fuzzy feelings about where this patchset is heading.
Nack here and will be updated with trackleak if necessary.
On Wed, Sep 14, 2022 at 11:37 AM zhaoyang.huang
<[email protected]> wrote:
>
> From: Zhaoyang Huang <[email protected]>
>
> The page with special page type will be deemed as bad page wrongly since
> type share the same address with mapcount.
>
> Signed-off-by: Zhaoyang Huang <[email protected]>
> ---
> include/linux/page-flags.h | 4 ++--
> mm/page_alloc.c | 2 +-
> 2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index e66f7aa..5d3274b 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -946,9 +946,9 @@ static inline bool is_page_hwpoison(struct page *page)
> #define PageType(page, flag) \
> ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
>
> -static inline int page_has_type(struct page *page)
> +static inline bool page_has_type(struct page *page)
> {
> - return (int)page->page_type < PAGE_MAPCOUNT_RESERVE;
> + return page->page_type < PAGE_MAPCOUNT_RESERVE;
> }
>
> #define PAGE_TYPE_OPS(uname, lname) \
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index e008a3d..3714680 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1164,7 +1164,7 @@ int split_free_page(struct page *free_page,
> static inline bool page_expected_state(struct page *page,
> unsigned long check_flags)
> {
> - if (unlikely(atomic_read(&page->_mapcount) != -1))
> + if (unlikely(!page_has_type(page) && atomic_read(&page->_mapcount) != -1))
> return false;
>
> if (unlikely((unsigned long)page->mapping |
> --
> 1.9.1
>