When enabling KCSAN for kernel/sched (remove KCSAN_SANITIZE := n from
kernel/sched/Makefile), with CONFIG_DEBUG_PREEMPT=y, we can observe
recursion due to:
check_access() [via instrumentation]
kcsan_setup_watchpoint()
reset_kcsan_skip()
kcsan_prandom_u32_max()
get_cpu_var()
preempt_disable()
preempt_count_add() [in kernel/sched/core.c]
check_access() [via instrumentation]
Avoid this by rewriting kcsan_prandom_u32_max() to only use safe
versions of preempt_disable() and preempt_enable() that do not call into
scheduler code.
Note, while this currently does not affect an unmodified kernel, it'd be
good to keep a KCSAN kernel working when KCSAN_SANITIZE := n is removed
from kernel/sched/Makefile to permit testing scheduler code with KCSAN
if desired.
Fixes: cd290ec24633 ("kcsan: Use tracing-safe version of prandom")
Signed-off-by: Marco Elver <[email protected]>
---
v2:
* Update comment to also point out preempt_enable().
---
kernel/kcsan/core.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 3994a217bde7..10513f3e2349 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -284,10 +284,19 @@ should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *
*/
static u32 kcsan_prandom_u32_max(u32 ep_ro)
{
- struct rnd_state *state = &get_cpu_var(kcsan_rand_state);
- const u32 res = prandom_u32_state(state);
+ struct rnd_state *state;
+ u32 res;
+
+ /*
+ * Avoid recursion with scheduler by using non-tracing versions of
+ * preempt_disable() and preempt_enable() that do not call into
+ * scheduler code.
+ */
+ preempt_disable_notrace();
+ state = raw_cpu_ptr(&kcsan_rand_state);
+ res = prandom_u32_state(state);
+ preempt_enable_no_resched_notrace();
- put_cpu_var(kcsan_rand_state);
return (u32)(((u64) res * ep_ro) >> 32);
}
--
2.29.2.454.gaff20da3a2-goog
On Mon, Nov 23, 2020 at 02:23:00PM +0100, Marco Elver wrote:
> When enabling KCSAN for kernel/sched (remove KCSAN_SANITIZE := n from
> kernel/sched/Makefile), with CONFIG_DEBUG_PREEMPT=y, we can observe
> recursion due to:
>
> check_access() [via instrumentation]
> kcsan_setup_watchpoint()
> reset_kcsan_skip()
> kcsan_prandom_u32_max()
> get_cpu_var()
> preempt_disable()
> preempt_count_add() [in kernel/sched/core.c]
> check_access() [via instrumentation]
>
> Avoid this by rewriting kcsan_prandom_u32_max() to only use safe
> versions of preempt_disable() and preempt_enable() that do not call into
> scheduler code.
>
> Note, while this currently does not affect an unmodified kernel, it'd be
> good to keep a KCSAN kernel working when KCSAN_SANITIZE := n is removed
> from kernel/sched/Makefile to permit testing scheduler code with KCSAN
> if desired.
>
> Fixes: cd290ec24633 ("kcsan: Use tracing-safe version of prandom")
> Signed-off-by: Marco Elver <[email protected]>
> ---
> v2:
> * Update comment to also point out preempt_enable().
> ---
> kernel/kcsan/core.c | 15 ++++++++++++---
> 1 file changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> index 3994a217bde7..10513f3e2349 100644
> --- a/kernel/kcsan/core.c
> +++ b/kernel/kcsan/core.c
> @@ -284,10 +284,19 @@ should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *
> */
> static u32 kcsan_prandom_u32_max(u32 ep_ro)
> {
> - struct rnd_state *state = &get_cpu_var(kcsan_rand_state);
> - const u32 res = prandom_u32_state(state);
> + struct rnd_state *state;
> + u32 res;
> +
> + /*
> + * Avoid recursion with scheduler by using non-tracing versions of
> + * preempt_disable() and preempt_enable() that do not call into
> + * scheduler code.
> + */
> + preempt_disable_notrace();
> + state = raw_cpu_ptr(&kcsan_rand_state);
> + res = prandom_u32_state(state);
> + preempt_enable_no_resched_notrace();
This is a preemption bug. Does preempt_enable_notrace() not work?
>
> - put_cpu_var(kcsan_rand_state);
> return (u32)(((u64) res * ep_ro) >> 32);
> }
>
> --
> 2.29.2.454.gaff20da3a2-goog
>
On Mon, Nov 23, 2020 at 04:57:46PM +0100, Marco Elver wrote:
> Let me know what you prefer.
>
> @@ -288,27 +288,19 @@ static u32 kcsan_prandom_u32_max(u32 ep_ro)
> u32 res;
>
> /*
> + * Avoid recursion with scheduler by disabling KCSAN because
> + * preempt_enable_notrace() will still call into scheduler code.
> */
> + kcsan_disable_current();
> preempt_disable_notrace();
> state = raw_cpu_ptr(&kcsan_rand_state);
> res = prandom_u32_state(state);
> + preempt_enable_notrace();
> + kcsan_enable_current_nowarn();
>
> return (u32)(((u64) res * ep_ro) >> 32);
> }
This is much preferred over the other. The thing with _no_resched is that
you can miss a preemption for an unbounded amount of time, which is bad.
The _only_ valid use of _no_resched is when there's a call to schedule()
right after it.
On Mon, 23 Nov 2020 at 17:08, Peter Zijlstra <[email protected]> wrote:
> On Mon, Nov 23, 2020 at 04:57:46PM +0100, Marco Elver wrote:
> > Let me know what you prefer.
> >
>
> > @@ -288,27 +288,19 @@ static u32 kcsan_prandom_u32_max(u32 ep_ro)
> > u32 res;
> >
> > /*
> > + * Avoid recursion with scheduler by disabling KCSAN because
> > + * preempt_enable_notrace() will still call into scheduler code.
> > */
> > + kcsan_disable_current();
> > preempt_disable_notrace();
> > state = raw_cpu_ptr(&kcsan_rand_state);
> > res = prandom_u32_state(state);
> > + preempt_enable_notrace();
> > + kcsan_enable_current_nowarn();
> >
> > return (u32)(((u64) res * ep_ro) >> 32);
> > }
>
> This is much preferred over the other. The thing with _no_resched is that
> you can miss a preemption for an unbounded amount of time, which is bad.
Ah, I think this is rubbish, too. Because it might fix one problem,
but now I'm left with the problem that kcsan_prandom_u32_max() is
called for udelay() later and at that point we lose skip_count
randomness entirely.
I think relying on lib/random32.c already caused too many headaches,
so I'm tempted to just get rid of that dependency entirely. And
instead do the simplest possible thing, which might just be calling
get_cycles() (all we need is to introduce some non-determinism).
> The _only_ valid use of _no_resched is when there's a call to schedule()
> right after it.
On Mon, 23 Nov 2020 at 14:55, Peter Zijlstra <[email protected]> wrote:
> On Mon, Nov 23, 2020 at 02:23:00PM +0100, Marco Elver wrote:
> > When enabling KCSAN for kernel/sched (remove KCSAN_SANITIZE := n from
> > kernel/sched/Makefile), with CONFIG_DEBUG_PREEMPT=y, we can observe
> > recursion due to:
> >
> > check_access() [via instrumentation]
> > kcsan_setup_watchpoint()
> > reset_kcsan_skip()
> > kcsan_prandom_u32_max()
> > get_cpu_var()
> > preempt_disable()
> > preempt_count_add() [in kernel/sched/core.c]
> > check_access() [via instrumentation]
> >
> > Avoid this by rewriting kcsan_prandom_u32_max() to only use safe
> > versions of preempt_disable() and preempt_enable() that do not call into
> > scheduler code.
> >
> > Note, while this currently does not affect an unmodified kernel, it'd be
> > good to keep a KCSAN kernel working when KCSAN_SANITIZE := n is removed
> > from kernel/sched/Makefile to permit testing scheduler code with KCSAN
> > if desired.
> >
> > Fixes: cd290ec24633 ("kcsan: Use tracing-safe version of prandom")
> > Signed-off-by: Marco Elver <[email protected]>
> > ---
> > v2:
> > * Update comment to also point out preempt_enable().
> > ---
> > kernel/kcsan/core.c | 15 ++++++++++++---
> > 1 file changed, 12 insertions(+), 3 deletions(-)
> >
> > diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> > index 3994a217bde7..10513f3e2349 100644
> > --- a/kernel/kcsan/core.c
> > +++ b/kernel/kcsan/core.c
> > @@ -284,10 +284,19 @@ should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *
> > */
> > static u32 kcsan_prandom_u32_max(u32 ep_ro)
> > {
> > - struct rnd_state *state = &get_cpu_var(kcsan_rand_state);
> > - const u32 res = prandom_u32_state(state);
> > + struct rnd_state *state;
> > + u32 res;
> > +
> > + /*
> > + * Avoid recursion with scheduler by using non-tracing versions of
> > + * preempt_disable() and preempt_enable() that do not call into
> > + * scheduler code.
> > + */
> > + preempt_disable_notrace();
> > + state = raw_cpu_ptr(&kcsan_rand_state);
> > + res = prandom_u32_state(state);
> > + preempt_enable_no_resched_notrace();
>
> This is a preemption bug. Does preempt_enable_notrace() not work?
No it didn't, because we end up calling preempt_schedule_notrace(),
which again might end in recursion.
Normally we could surround this by
kcsan_disable_current/kcsan_enable_current(), but that doesn't work
because we have this sequence:
reset_kcsan_skip();
if (!kcsan_is_enabled())
...
to avoid underflowing the skip counter if KCSAN is disabled. That
could be solved by writing to the skip-counter twice: once with a
non-random value, and if KCSAN is enabled with a random value. Would
that be better?
And I'd like to avoid adding __no_kcsan to scheduler functions.
Any recommendation?
Thanks,
-- Marco
>
> >
> > - put_cpu_var(kcsan_rand_state);
> > return (u32)(((u64) res * ep_ro) >> 32);
> > }
> >
> > --
> > 2.29.2.454.gaff20da3a2-goog
> >
On Mon, Nov 23, 2020 at 04:17PM +0100, Marco Elver wrote:
> On Mon, 23 Nov 2020 at 14:55, Peter Zijlstra <[email protected]> wrote:
> > On Mon, Nov 23, 2020 at 02:23:00PM +0100, Marco Elver wrote:
> > > When enabling KCSAN for kernel/sched (remove KCSAN_SANITIZE := n from
> > > kernel/sched/Makefile), with CONFIG_DEBUG_PREEMPT=y, we can observe
> > > recursion due to:
> > >
> > > check_access() [via instrumentation]
> > > kcsan_setup_watchpoint()
> > > reset_kcsan_skip()
> > > kcsan_prandom_u32_max()
> > > get_cpu_var()
> > > preempt_disable()
> > > preempt_count_add() [in kernel/sched/core.c]
> > > check_access() [via instrumentation]
> > >
> > > Avoid this by rewriting kcsan_prandom_u32_max() to only use safe
> > > versions of preempt_disable() and preempt_enable() that do not call into
> > > scheduler code.
> > >
> > > Note, while this currently does not affect an unmodified kernel, it'd be
> > > good to keep a KCSAN kernel working when KCSAN_SANITIZE := n is removed
> > > from kernel/sched/Makefile to permit testing scheduler code with KCSAN
> > > if desired.
> > >
> > > Fixes: cd290ec24633 ("kcsan: Use tracing-safe version of prandom")
> > > Signed-off-by: Marco Elver <[email protected]>
> > > ---
> > > v2:
> > > * Update comment to also point out preempt_enable().
> > > ---
> > > kernel/kcsan/core.c | 15 ++++++++++++---
> > > 1 file changed, 12 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> > > index 3994a217bde7..10513f3e2349 100644
> > > --- a/kernel/kcsan/core.c
> > > +++ b/kernel/kcsan/core.c
> > > @@ -284,10 +284,19 @@ should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *
> > > */
> > > static u32 kcsan_prandom_u32_max(u32 ep_ro)
> > > {
> > > - struct rnd_state *state = &get_cpu_var(kcsan_rand_state);
> > > - const u32 res = prandom_u32_state(state);
> > > + struct rnd_state *state;
> > > + u32 res;
> > > +
> > > + /*
> > > + * Avoid recursion with scheduler by using non-tracing versions of
> > > + * preempt_disable() and preempt_enable() that do not call into
> > > + * scheduler code.
> > > + */
> > > + preempt_disable_notrace();
> > > + state = raw_cpu_ptr(&kcsan_rand_state);
> > > + res = prandom_u32_state(state);
> > > + preempt_enable_no_resched_notrace();
> >
> > This is a preemption bug. Does preempt_enable_notrace() not work?
>
> No it didn't, because we end up calling preempt_schedule_notrace(),
> which again might end in recursion.
>
> Normally we could surround this by
> kcsan_disable_current/kcsan_enable_current(), but that doesn't work
> because we have this sequence:
>
> reset_kcsan_skip();
> if (!kcsan_is_enabled())
> ...
>
> to avoid underflowing the skip counter if KCSAN is disabled. That
> could be solved by writing to the skip-counter twice: once with a
> non-random value, and if KCSAN is enabled with a random value. Would
> that be better?
See below for concrete alternative that works.
> And I'd like to avoid adding __no_kcsan to scheduler functions.
>
> Any recommendation?
Let me know what you prefer.
Thanks,
-- Marco
------ >8 ------
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 10513f3e2349..c8eadef3f42a 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -266,8 +266,8 @@ should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *
return false;
/*
- * NOTE: If we get here, kcsan_skip must always be reset in slow path
- * via reset_kcsan_skip() to avoid underflow.
+ * Note: If we get here, kcsan_skip must always be reset in slow path to
+ * avoid underflow.
*/
/* this operation should be watched */
@@ -288,27 +288,19 @@ static u32 kcsan_prandom_u32_max(u32 ep_ro)
u32 res;
/*
- * Avoid recursion with scheduler by using non-tracing versions of
- * preempt_disable() and preempt_enable() that do not call into
- * scheduler code.
+ * Avoid recursion with scheduler by disabling KCSAN because
+ * preempt_enable_notrace() will still call into scheduler code.
*/
+ kcsan_disable_current();
preempt_disable_notrace();
state = raw_cpu_ptr(&kcsan_rand_state);
res = prandom_u32_state(state);
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
+ kcsan_enable_current_nowarn();
return (u32)(((u64) res * ep_ro) >> 32);
}
-static inline void reset_kcsan_skip(void)
-{
- long skip_count = kcsan_skip_watch -
- (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ?
- kcsan_prandom_u32_max(kcsan_skip_watch) :
- 0);
- this_cpu_write(kcsan_skip, skip_count);
-}
-
static __always_inline bool kcsan_is_enabled(void)
{
return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
@@ -430,10 +422,16 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
* Always reset kcsan_skip counter in slow-path to avoid underflow; see
* should_watch().
*/
- reset_kcsan_skip();
-
- if (!kcsan_is_enabled())
+ if (likely(kcsan_is_enabled())) {
+ long skip_count = kcsan_skip_watch -
+ (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ?
+ kcsan_prandom_u32_max(kcsan_skip_watch) :
+ 0);
+ this_cpu_write(kcsan_skip, skip_count);
+ } else {
+ this_cpu_write(kcsan_skip, kcsan_skip_watch);
goto out;
+ }
/*
* Special atomic rules: unlikely to be true, so we check them here in
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 5fc9c9b70862..21fb5a5662b5 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -7,12 +7,6 @@ endif
# that is not a function of syscall inputs. E.g. involuntary context switches.
KCOV_INSTRUMENT := n
-# There are numerous data races here, however, most of them are due to plain accesses.
-# This would make it even harder for syzbot to find reproducers, because these
-# bugs trigger without specific input. Disable by default, but should re-enable
-# eventually.
-KCSAN_SANITIZE := n
-
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <[email protected]>, the -fno-omit-frame-pointer is
# needed for x86 only. Why this used to be enabled for all architectures is beyond