Instead of removing the fault handling portion of the stack trace based
on the fault handler's name, just use struct pt_regs directly.
Change kfence_handle_page_fault() to take a struct pt_regs, and plumb it
through to kfence_report_error() for out-of-bounds, use-after-free, or
invalid access errors, where pt_regs is used to generate the stack
trace.
If the kernel is a DEBUG_KERNEL, also show registers for more
information.
Suggested-by: Mark Rutland <[email protected]>
Signed-off-by: Marco Elver <[email protected]>
---
arch/arm64/include/asm/kfence.h | 2 --
arch/arm64/mm/fault.c | 2 +-
arch/x86/include/asm/kfence.h | 6 ----
arch/x86/mm/fault.c | 2 +-
include/linux/kfence.h | 5 +--
mm/kfence/core.c | 10 +++---
mm/kfence/kfence.h | 4 +--
mm/kfence/report.c | 63 +++++++++++++++++++--------------
8 files changed, 48 insertions(+), 46 deletions(-)
diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
index 5ac0f599cc9a..6c0afeeab635 100644
--- a/arch/arm64/include/asm/kfence.h
+++ b/arch/arm64/include/asm/kfence.h
@@ -5,8 +5,6 @@
#include <asm/cacheflush.h>
-#define KFENCE_SKIP_ARCH_FAULT_HANDLER "el1_sync"
-
static inline bool arch_kfence_init_pool(void) { return true; }
static inline bool kfence_protect_page(unsigned long addr, bool protect)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2d60204b4ed2..183d1e6dd9e0 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -323,7 +323,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
} else if (addr < PAGE_SIZE) {
msg = "NULL pointer dereference";
} else {
- if (kfence_handle_page_fault(addr))
+ if (kfence_handle_page_fault(addr, regs))
return;
msg = "paging request";
diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h
index beeac105dae7..2f3f877a7a5c 100644
--- a/arch/x86/include/asm/kfence.h
+++ b/arch/x86/include/asm/kfence.h
@@ -11,12 +11,6 @@
#include <asm/set_memory.h>
#include <asm/tlbflush.h>
-/*
- * The page fault handler entry function, up to which the stack trace is
- * truncated in reports.
- */
-#define KFENCE_SKIP_ARCH_FAULT_HANDLER "asm_exc_page_fault"
-
/* Force 4K pages for __kfence_pool. */
static inline bool arch_kfence_init_pool(void)
{
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e42db2836438..53d732161b4f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -727,7 +727,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
efi_recover_from_page_fault(address);
/* Only not-present faults should be handled by KFENCE. */
- if (!(error_code & X86_PF_PROT) && kfence_handle_page_fault(address))
+ if (!(error_code & X86_PF_PROT) && kfence_handle_page_fault(address, regs))
return;
oops:
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
index ed2d48acdafe..98a97f9d43cd 100644
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -171,6 +171,7 @@ static __always_inline __must_check bool kfence_free(void *addr)
/**
* kfence_handle_page_fault() - perform page fault handling for KFENCE pages
* @addr: faulting address
+ * @regs: current struct pt_regs (can be NULL, but shows full stack trace)
*
* Return:
* * false - address outside KFENCE pool,
@@ -181,7 +182,7 @@ static __always_inline __must_check bool kfence_free(void *addr)
* cases KFENCE prints an error message and marks the offending page as
* present, so that the kernel can proceed.
*/
-bool __must_check kfence_handle_page_fault(unsigned long addr);
+bool __must_check kfence_handle_page_fault(unsigned long addr, struct pt_regs *regs);
#else /* CONFIG_KFENCE */
@@ -194,7 +195,7 @@ static inline size_t kfence_ksize(const void *addr) { return 0; }
static inline void *kfence_object_start(const void *addr) { return NULL; }
static inline void __kfence_free(void *addr) { }
static inline bool __must_check kfence_free(void *addr) { return false; }
-static inline bool __must_check kfence_handle_page_fault(unsigned long addr) { return false; }
+static inline bool __must_check kfence_handle_page_fault(unsigned long addr, struct pt_regs *regs) { return false; }
#endif
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 9d597013cd5d..9358f42a9a9e 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -212,7 +212,7 @@ static inline bool check_canary_byte(u8 *addr)
return true;
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
- kfence_report_error((unsigned long)addr, addr_to_metadata((unsigned long)addr),
+ kfence_report_error((unsigned long)addr, NULL, addr_to_metadata((unsigned long)addr),
KFENCE_ERROR_CORRUPTION);
return false;
}
@@ -351,7 +351,7 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z
if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) {
/* Invalid or double-free, bail out. */
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
- kfence_report_error((unsigned long)addr, meta, KFENCE_ERROR_INVALID_FREE);
+ kfence_report_error((unsigned long)addr, NULL, meta, KFENCE_ERROR_INVALID_FREE);
raw_spin_unlock_irqrestore(&meta->lock, flags);
return;
}
@@ -752,7 +752,7 @@ void __kfence_free(void *addr)
kfence_guarded_free(addr, meta, false);
}
-bool kfence_handle_page_fault(unsigned long addr)
+bool kfence_handle_page_fault(unsigned long addr, struct pt_regs *regs)
{
const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
struct kfence_metadata *to_report = NULL;
@@ -815,11 +815,11 @@ bool kfence_handle_page_fault(unsigned long addr)
out:
if (to_report) {
- kfence_report_error(addr, to_report, error_type);
+ kfence_report_error(addr, regs, to_report, error_type);
raw_spin_unlock_irqrestore(&to_report->lock, flags);
} else {
/* This may be a UAF or OOB access, but we can't be sure. */
- kfence_report_error(addr, NULL, KFENCE_ERROR_INVALID);
+ kfence_report_error(addr, regs, NULL, KFENCE_ERROR_INVALID);
}
return kfence_unprotect(addr); /* Unprotect and let access proceed. */
diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
index f115aabc2052..fa3579d03089 100644
--- a/mm/kfence/kfence.h
+++ b/mm/kfence/kfence.h
@@ -99,8 +99,8 @@ enum kfence_error_type {
KFENCE_ERROR_INVALID_FREE, /* Invalid free. */
};
-void kfence_report_error(unsigned long address, const struct kfence_metadata *meta,
- enum kfence_error_type type);
+void kfence_report_error(unsigned long address, struct pt_regs *regs,
+ const struct kfence_metadata *meta, enum kfence_error_type type);
void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta);
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
index 0fdaa3ddf1b4..4dedc2ff8f28 100644
--- a/mm/kfence/report.c
+++ b/mm/kfence/report.c
@@ -5,6 +5,7 @@
#include <linux/kernel.h>
#include <linux/lockdep.h>
#include <linux/printk.h>
+#include <linux/sched/debug.h>
#include <linux/seq_file.h>
#include <linux/stacktrace.h>
#include <linux/string.h>
@@ -36,7 +37,6 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
{
char buf[64];
int skipnr, fallback = 0;
- bool is_access_fault = false;
if (type) {
/* Depending on error type, find different stack entries. */
@@ -44,8 +44,12 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
case KFENCE_ERROR_UAF:
case KFENCE_ERROR_OOB:
case KFENCE_ERROR_INVALID:
- is_access_fault = true;
- break;
+ /*
+ * kfence_handle_page_fault() may be called with pt_regs
+ * set to NULL; in that case we'll simply show the full
+ * stack trace.
+ */
+ return 0;
case KFENCE_ERROR_CORRUPTION:
case KFENCE_ERROR_INVALID_FREE:
break;
@@ -55,26 +59,21 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
for (skipnr = 0; skipnr < num_entries; skipnr++) {
int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]);
- if (is_access_fault) {
- if (!strncmp(buf, KFENCE_SKIP_ARCH_FAULT_HANDLER, len))
- goto found;
- } else {
- if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") ||
- !strncmp(buf, "__slab_free", len)) {
- /*
- * In case of tail calls from any of the below
- * to any of the above.
- */
- fallback = skipnr + 1;
- }
-
- /* Also the *_bulk() variants by only checking prefixes. */
- if (str_has_prefix(buf, "kfree") ||
- str_has_prefix(buf, "kmem_cache_free") ||
- str_has_prefix(buf, "__kmalloc") ||
- str_has_prefix(buf, "kmem_cache_alloc"))
- goto found;
+ if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") ||
+ !strncmp(buf, "__slab_free", len)) {
+ /*
+ * In case of tail calls from any of the below
+ * to any of the above.
+ */
+ fallback = skipnr + 1;
}
+
+ /* Also the *_bulk() variants by only checking prefixes. */
+ if (str_has_prefix(buf, "kfree") ||
+ str_has_prefix(buf, "kmem_cache_free") ||
+ str_has_prefix(buf, "__kmalloc") ||
+ str_has_prefix(buf, "kmem_cache_alloc"))
+ goto found;
}
if (fallback < num_entries)
return fallback;
@@ -152,13 +151,20 @@ static void print_diff_canary(unsigned long address, size_t bytes_to_show,
pr_cont(" ]");
}
-void kfence_report_error(unsigned long address, const struct kfence_metadata *meta,
- enum kfence_error_type type)
+void kfence_report_error(unsigned long address, struct pt_regs *regs,
+ const struct kfence_metadata *meta, enum kfence_error_type type)
{
unsigned long stack_entries[KFENCE_STACK_DEPTH] = { 0 };
- int num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 1);
- int skipnr = get_stack_skipnr(stack_entries, num_stack_entries, &type);
const ptrdiff_t object_index = meta ? meta - kfence_metadata : -1;
+ int num_stack_entries;
+ int skipnr = 0;
+
+ if (regs) {
+ num_stack_entries = stack_trace_save_regs(regs, stack_entries, KFENCE_STACK_DEPTH, 0);
+ } else {
+ num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 1);
+ skipnr = get_stack_skipnr(stack_entries, num_stack_entries, &type);
+ }
/* Require non-NULL meta, except if KFENCE_ERROR_INVALID. */
if (WARN_ON(type != KFENCE_ERROR_INVALID && !meta))
@@ -222,7 +228,10 @@ void kfence_report_error(unsigned long address, const struct kfence_metadata *me
/* Print report footer. */
pr_err("\n");
- dump_stack_print_info(KERN_ERR);
+ if (IS_ENABLED(CONFIG_DEBUG_KERNEL) && regs)
+ show_regs(regs);
+ else
+ dump_stack_print_info(KERN_ERR);
pr_err("==================================================================\n");
lockdep_on();
--
2.29.1.341.ge80a0c044ae-goog
On Thu, Nov 05, 2020 at 10:21:33AM +0100, Marco Elver wrote:
> Instead of removing the fault handling portion of the stack trace based
> on the fault handler's name, just use struct pt_regs directly.
>
> Change kfence_handle_page_fault() to take a struct pt_regs, and plumb it
> through to kfence_report_error() for out-of-bounds, use-after-free, or
> invalid access errors, where pt_regs is used to generate the stack
> trace.
>
> If the kernel is a DEBUG_KERNEL, also show registers for more
> information.
>
> Suggested-by: Mark Rutland <[email protected]>
> Signed-off-by: Marco Elver <[email protected]>
Wow; I wasn't expecting this to be put together so quickly, thanks for
doing this!
From a scan, this looks good to me -- just one question below.
> diff --git a/include/linux/kfence.h b/include/linux/kfence.h
> index ed2d48acdafe..98a97f9d43cd 100644
> --- a/include/linux/kfence.h
> +++ b/include/linux/kfence.h
> @@ -171,6 +171,7 @@ static __always_inline __must_check bool kfence_free(void *addr)
> /**
> * kfence_handle_page_fault() - perform page fault handling for KFENCE pages
> * @addr: faulting address
> + * @regs: current struct pt_regs (can be NULL, but shows full stack trace)
> *
> * Return:
> * * false - address outside KFENCE pool,
> @@ -44,8 +44,12 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
> case KFENCE_ERROR_UAF:
> case KFENCE_ERROR_OOB:
> case KFENCE_ERROR_INVALID:
> - is_access_fault = true;
> - break;
> + /*
> + * kfence_handle_page_fault() may be called with pt_regs
> + * set to NULL; in that case we'll simply show the full
> + * stack trace.
> + */
> + return 0;
For both the above comments, when/where is kfence_handle_page_fault()
called with regs set to NULL? I couldn't spot that in this patch, so
unless I mised it I'm guessing that's somewhere outside of the patch
context?
If this is a case we don't expect to happen, maybe add a WARN_ON_ONCE()?
Thanks,
Mark.
On Thu, 5 Nov 2020 at 11:52, Mark Rutland <[email protected]> wrote:
> On Thu, Nov 05, 2020 at 10:21:33AM +0100, Marco Elver wrote:
> > Instead of removing the fault handling portion of the stack trace based
> > on the fault handler's name, just use struct pt_regs directly.
> >
> > Change kfence_handle_page_fault() to take a struct pt_regs, and plumb it
> > through to kfence_report_error() for out-of-bounds, use-after-free, or
> > invalid access errors, where pt_regs is used to generate the stack
> > trace.
> >
> > If the kernel is a DEBUG_KERNEL, also show registers for more
> > information.
> >
> > Suggested-by: Mark Rutland <[email protected]>
> > Signed-off-by: Marco Elver <[email protected]>
>
> Wow; I wasn't expecting this to be put together so quickly, thanks for
> doing this!
>
> From a scan, this looks good to me -- just one question below.
>
> > diff --git a/include/linux/kfence.h b/include/linux/kfence.h
> > index ed2d48acdafe..98a97f9d43cd 100644
> > --- a/include/linux/kfence.h
> > +++ b/include/linux/kfence.h
> > @@ -171,6 +171,7 @@ static __always_inline __must_check bool kfence_free(void *addr)
> > /**
> > * kfence_handle_page_fault() - perform page fault handling for KFENCE pages
> > * @addr: faulting address
> > + * @regs: current struct pt_regs (can be NULL, but shows full stack trace)
> > *
> > * Return:
> > * * false - address outside KFENCE pool,
>
> > @@ -44,8 +44,12 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
> > case KFENCE_ERROR_UAF:
> > case KFENCE_ERROR_OOB:
> > case KFENCE_ERROR_INVALID:
> > - is_access_fault = true;
> > - break;
> > + /*
> > + * kfence_handle_page_fault() may be called with pt_regs
> > + * set to NULL; in that case we'll simply show the full
> > + * stack trace.
> > + */
> > + return 0;
>
> For both the above comments, when/where is kfence_handle_page_fault()
> called with regs set to NULL? I couldn't spot that in this patch, so
> unless I mised it I'm guessing that's somewhere outside of the patch
> context?
Right, currently it's not expected to happen, but I'd like to permit
this function being called not from fault handlers, for use-cases like
this:
https://lkml.kernel.org/r/CANpmjNNxAvembOetv15FfZ=04mpj0Qwx+1tnn22tABaHHRRv=Q@mail.gmail.com
The revised recommendation when trying to get KFENCE to give us more
information about allocation/free stacks after refcount underflow
(like what Paul was trying to do) would be to call
kfence_handle_page_fault(addr, NULL).
> If this is a case we don't expect to happen, maybe add a WARN_ON_ONCE()?
While it's currently not expected, I don't see why we should make this
WARN and limit the potential uses of the API if it works just fine if
we pass regs set to NULL. Although arguably the name
kfence_handle_page_fault() might be confusing for such uses, for now,
until more widespread use is evident (if at all) I'd say let's keep
as-is, but simply not prevent such use-cases.
Thanks,
-- Marco
On Thu, Nov 05, 2020 at 12:11:19PM +0100, Marco Elver wrote:
> On Thu, 5 Nov 2020 at 11:52, Mark Rutland <[email protected]> wrote:
> > On Thu, Nov 05, 2020 at 10:21:33AM +0100, Marco Elver wrote:
> > > Instead of removing the fault handling portion of the stack trace based
> > > on the fault handler's name, just use struct pt_regs directly.
> > >
> > > Change kfence_handle_page_fault() to take a struct pt_regs, and plumb it
> > > through to kfence_report_error() for out-of-bounds, use-after-free, or
> > > invalid access errors, where pt_regs is used to generate the stack
> > > trace.
> > >
> > > If the kernel is a DEBUG_KERNEL, also show registers for more
> > > information.
> > >
> > > Suggested-by: Mark Rutland <[email protected]>
> > > Signed-off-by: Marco Elver <[email protected]>
> >
> > Wow; I wasn't expecting this to be put together so quickly, thanks for
> > doing this!
> >
> > From a scan, this looks good to me -- just one question below.
> >
> > > diff --git a/include/linux/kfence.h b/include/linux/kfence.h
> > > index ed2d48acdafe..98a97f9d43cd 100644
> > > --- a/include/linux/kfence.h
> > > +++ b/include/linux/kfence.h
> > > @@ -171,6 +171,7 @@ static __always_inline __must_check bool kfence_free(void *addr)
> > > /**
> > > * kfence_handle_page_fault() - perform page fault handling for KFENCE pages
> > > * @addr: faulting address
> > > + * @regs: current struct pt_regs (can be NULL, but shows full stack trace)
> > > *
> > > * Return:
> > > * * false - address outside KFENCE pool,
> >
> > > @@ -44,8 +44,12 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
> > > case KFENCE_ERROR_UAF:
> > > case KFENCE_ERROR_OOB:
> > > case KFENCE_ERROR_INVALID:
> > > - is_access_fault = true;
> > > - break;
> > > + /*
> > > + * kfence_handle_page_fault() may be called with pt_regs
> > > + * set to NULL; in that case we'll simply show the full
> > > + * stack trace.
> > > + */
> > > + return 0;
> >
> > For both the above comments, when/where is kfence_handle_page_fault()
> > called with regs set to NULL? I couldn't spot that in this patch, so
> > unless I mised it I'm guessing that's somewhere outside of the patch
> > context?
>
> Right, currently it's not expected to happen, but I'd like to permit
> this function being called not from fault handlers, for use-cases like
> this:
>
> https://lkml.kernel.org/r/CANpmjNNxAvembOetv15FfZ=04mpj0Qwx+1tnn22tABaHHRRv=Q@mail.gmail.com
>
> The revised recommendation when trying to get KFENCE to give us more
> information about allocation/free stacks after refcount underflow
> (like what Paul was trying to do) would be to call
> kfence_handle_page_fault(addr, NULL).
>
> > If this is a case we don't expect to happen, maybe add a WARN_ON_ONCE()?
>
> While it's currently not expected, I don't see why we should make this
> WARN and limit the potential uses of the API if it works just fine if
> we pass regs set to NULL. Although arguably the name
> kfence_handle_page_fault() might be confusing for such uses, for now,
> until more widespread use is evident (if at all) I'd say let's keep
> as-is, but simply not prevent such use-cases.
Fair enough! I guess in future we could always revise that anyhow.
FWIW, for this as-is:
Acked-by: Mark Rutland <[email protected]>
Mark.