Changelog:
v3:
- Sync with the latest linux-next
v2:
- As suggested by Matthew Wilcox removed "mm: page_ref_add_unless()
does not trace 'u' argument" patch as page_ref_add_unless is going
away.
v1:
- sync with the latest linux-next
RFCv2:
- use the "fetch" variant instead of "return" of atomic instructions
- allow negative values, as we are using all 32-bits of _refcount.
It is hard to root cause _refcount problems, because they usually
manifest after the damage has occurred. Yet, they can lead to
catastrophic failures such memory corruptions. There were a number
of refcount related issues discovered recently [1], [2], [3].
Improve debugability by adding more checks that ensure that
page->_refcount never turns negative (i.e. double free does not
happen, or free after freeze etc).
- Check for overflow and underflow right from the functions that
modify _refcount
- Remove set_page_count(), so we do not unconditionally overwrite
_refcount with an unrestrained value
- Trace return values in all functions that modify _refcount
Applies against next-20220125.
Previous verions:
v2: https://lore.kernel.org/all/[email protected]
v1: https://lore.kernel.org/all/[email protected]
RFCv2: https://lore.kernel.org/all/[email protected]
RFCv1: https://lore.kernel.org/all/[email protected]
[1] https://lore.kernel.org/all/[email protected]
[2] https://lore.kernel.org/all/[email protected]
[3] https://lore.kernel.org/all/[email protected]
Pasha Tatashin (9):
mm: add overflow and underflow checks for page->_refcount
mm: Avoid using set_page_count() in set_page_recounted()
mm: remove set_page_count() from page_frag_alloc_align
mm: avoid using set_page_count() when pages are freed into allocator
mm: rename init_page_count() -> page_ref_init()
mm: remove set_page_count()
mm: simplify page_ref_* functions
mm: do not use atomic_set_release in page_ref_unfreeze()
mm: use atomic_cmpxchg_acquire in page_ref_freeze().
arch/m68k/mm/motorola.c | 2 +-
include/linux/mm.h | 2 +-
include/linux/page_ref.h | 149 +++++++++++++++-----------------
include/trace/events/page_ref.h | 58 ++++++++-----
mm/debug_page_ref.c | 22 +----
mm/internal.h | 6 +-
mm/page_alloc.c | 19 ++--
7 files changed, 132 insertions(+), 126 deletions(-)
--
2.35.0.rc0.227.g00780c9af4-goog
set_page_refcounted() converts a non-refcounted page that has
(page->_refcount == 0) into a refcounted page by setting _refcount to
1.
The current apporach uses the following logic:
VM_BUG_ON_PAGE(page_ref_count(page), page);
set_page_count(page, 1);
However, if _refcount changes from 0 to 1 between the VM_BUG_ON_PAGE()
and set_page_count() we can break _refcount, which can cause other
problems such as memory corruptions.
Instead, use a safer method: increment _refcount first and verify
that at increment time it was indeed 1.
refcnt = page_ref_inc_return(page);
VM_BUG_ON_PAGE(refcnt != 1, page);
Use page_ref_inc_return() to avoid unconditionally overwriting
the _refcount value with set_page_count(), and check the return value.
Signed-off-by: Pasha Tatashin <[email protected]>
---
mm/internal.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 4c2d06a2f50b..6b74f7f32613 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -141,9 +141,11 @@ static inline bool page_evictable(struct page *page)
*/
static inline void set_page_refcounted(struct page *page)
{
+ int refcnt;
+
VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(page_ref_count(page), page);
- set_page_count(page, 1);
+ refcnt = page_ref_inc_return(page);
+ VM_BUG_ON_PAGE(refcnt != 1, page);
}
extern unsigned long highest_memmap_pfn;
--
2.35.0.rc0.227.g00780c9af4-goog
set_page_count() is dangerous because it resets _refcount to an
arbitrary value. Instead we now initialize _refcount to 1 only once,
and the rest of the time we are using add/dec/cmpxchg to have a
contiguous track of the counter.
Remove set_page_count() and add new tracing hooks to page_ref_init().
Signed-off-by: Pasha Tatashin <[email protected]>
---
include/linux/page_ref.h | 27 ++++++++-----------
include/trace/events/page_ref.h | 46 ++++++++++++++++++++++++++++-----
mm/debug_page_ref.c | 8 +++---
3 files changed, 54 insertions(+), 27 deletions(-)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 1af12a0d7ba1..d7316881626c 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -7,7 +7,7 @@
#include <linux/page-flags.h>
#include <linux/tracepoint-defs.h>
-DECLARE_TRACEPOINT(page_ref_set);
+DECLARE_TRACEPOINT(page_ref_init);
DECLARE_TRACEPOINT(page_ref_mod);
DECLARE_TRACEPOINT(page_ref_mod_and_test);
DECLARE_TRACEPOINT(page_ref_mod_and_return);
@@ -26,7 +26,7 @@ DECLARE_TRACEPOINT(page_ref_unfreeze);
*/
#define page_ref_tracepoint_active(t) tracepoint_enabled(t)
-extern void __page_ref_set(struct page *page, int v);
+extern void __page_ref_init(struct page *page);
extern void __page_ref_mod(struct page *page, int v);
extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
@@ -38,7 +38,7 @@ extern void __page_ref_unfreeze(struct page *page, int v);
#define page_ref_tracepoint_active(t) false
-static inline void __page_ref_set(struct page *page, int v)
+static inline void __page_ref_init(struct page *page)
{
}
static inline void __page_ref_mod(struct page *page, int v)
@@ -94,18 +94,6 @@ static inline int page_count(const struct page *page)
return folio_ref_count(page_folio(page));
}
-static inline void set_page_count(struct page *page, int v)
-{
- atomic_set(&page->_refcount, v);
- if (page_ref_tracepoint_active(page_ref_set))
- __page_ref_set(page, v);
-}
-
-static inline void folio_set_count(struct folio *folio, int v)
-{
- set_page_count(&folio->page, v);
-}
-
/*
* Setup the page refcount to one before being freed into the page allocator.
* The memory might not be initialized and therefore there cannot be any
@@ -116,7 +104,14 @@ static inline void folio_set_count(struct folio *folio, int v)
*/
static inline void page_ref_init(struct page *page)
{
- set_page_count(page, 1);
+ atomic_set(&page->_refcount, 1);
+ if (page_ref_tracepoint_active(page_ref_init))
+ __page_ref_init(page);
+}
+
+static inline void folio_ref_init(struct folio *folio)
+{
+ page_ref_init(&folio->page);
}
static inline int page_ref_add_return(struct page *page, int nr)
diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h
index 8a99c1cd417b..87551bb1df9e 100644
--- a/include/trace/events/page_ref.h
+++ b/include/trace/events/page_ref.h
@@ -10,6 +10,45 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>
+DECLARE_EVENT_CLASS(page_ref_init_template,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, pfn)
+ __field(unsigned long, flags)
+ __field(int, count)
+ __field(int, mapcount)
+ __field(void *, mapping)
+ __field(int, mt)
+ __field(int, val)
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = page_to_pfn(page);
+ __entry->flags = page->flags;
+ __entry->count = page_ref_count(page);
+ __entry->mapcount = page_mapcount(page);
+ __entry->mapping = page->mapping;
+ __entry->mt = get_pageblock_migratetype(page);
+ ),
+
+ TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d",
+ __entry->pfn,
+ show_page_flags(__entry->flags & PAGEFLAGS_MASK),
+ __entry->count,
+ __entry->mapcount, __entry->mapping, __entry->mt)
+);
+
+DEFINE_EVENT(page_ref_init_template, page_ref_init,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page)
+);
+
DECLARE_EVENT_CLASS(page_ref_mod_template,
TP_PROTO(struct page *page, int v),
@@ -44,13 +83,6 @@ DECLARE_EVENT_CLASS(page_ref_mod_template,
__entry->val)
);
-DEFINE_EVENT(page_ref_mod_template, page_ref_set,
-
- TP_PROTO(struct page *page, int v),
-
- TP_ARGS(page, v)
-);
-
DEFINE_EVENT(page_ref_mod_template, page_ref_mod,
TP_PROTO(struct page *page, int v),
diff --git a/mm/debug_page_ref.c b/mm/debug_page_ref.c
index f3b2c9d3ece2..e32149734122 100644
--- a/mm/debug_page_ref.c
+++ b/mm/debug_page_ref.c
@@ -5,12 +5,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/page_ref.h>
-void __page_ref_set(struct page *page, int v)
+void __page_ref_init(struct page *page)
{
- trace_page_ref_set(page, v);
+ trace_page_ref_init(page);
}
-EXPORT_SYMBOL(__page_ref_set);
-EXPORT_TRACEPOINT_SYMBOL(page_ref_set);
+EXPORT_SYMBOL(__page_ref_init);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_init);
void __page_ref_mod(struct page *page, int v)
{
--
2.35.0.rc0.227.g00780c9af4-goog
In we set the old _refcount value after verifying that the old value was
indeed 0.
VM_BUG_ON_PAGE(page_count(page) != 0, page);
< the _refcount may change here>
atomic_set_release(&page->_refcount, count);
To avoid the smal gap where _refcount may change lets verify the time
of the _refcount at the time of the set operation.
Use atomic_xchg_release() and at the set time verify that the value
was 0.
Signed-off-by: Pasha Tatashin <[email protected]>
---
include/linux/page_ref.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 243fc60ae6c8..9efabeff4e06 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -322,10 +322,9 @@ static inline int folio_ref_freeze(struct folio *folio, int count)
static inline void page_ref_unfreeze(struct page *page, int count)
{
- VM_BUG_ON_PAGE(page_count(page) != 0, page);
- VM_BUG_ON(count == 0);
+ int old_val = atomic_xchg_release(&page->_refcount, count);
- atomic_set_release(&page->_refcount, count);
+ VM_BUG_ON_PAGE(count == 0 || old_val != 0, page);
if (page_ref_tracepoint_active(page_ref_unfreeze))
__page_ref_unfreeze(page, count);
}
--
2.35.0.rc0.227.g00780c9af4-goog