Thomas spotted a nasty 32bit race in sched_clock_remote() after way too
many hours of debugging weirdness.
What happens is that sched_clock_remote() does regular machine word
reads of sched_clock_data::clock; this appears safe since we use
cmpxchg64() to update the variable and any half-read value would
trigger a retry.
Except we don't validate the new value 'val' in the same way! Thus we
can propagate non-atomic read errors into the clock value.
Cc: Ingo Molnar <[email protected]>
Cc: Steven Rostedt <[email protected]>
Debugged-by: Thomas Gleixner <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
---
kernel/sched/clock.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c685e31..7042ef7 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -170,6 +170,21 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
return clock;
}
+#ifndef CONFIG_64BIT
+/*
+ * 32bit machines can't atomically read a u64 except using cmpxchg64()
+ */
+static inline u64 scd_read_clock(struct sched_clock_data *scd)
+{
+ return cmpxchg64(&scd->clock, 0, 0);
+}
+#else
+static inline u64 scd_read_clock(struct sched_clock_data *scd)
+{
+ return scd->clock;
+}
+#endif
+
static u64 sched_clock_remote(struct sched_clock_data *scd)
{
struct sched_clock_data *my_scd = this_scd();
@@ -178,8 +193,8 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
sched_clock_local(my_scd);
again:
- this_clock = my_scd->clock;
- remote_clock = scd->clock;
+ this_clock = scd_clock_read(my_scd);
+ remote_clock = scd_clock_read(scd);
/*
* Use the opportunity that we have both locks
On Fri, 2013-04-05 at 18:36 +0200, Peter Zijlstra wrote:
> Thomas spotted a nasty 32bit race in sched_clock_remote() after way too
> many hours of debugging weirdness.
>
> What happens is that sched_clock_remote() does regular machine word
> reads of sched_clock_data::clock; this appears safe since we use
> cmpxchg64() to update the variable and any half-read value would
> trigger a retry.
>
> Except we don't validate the new value 'val' in the same way! Thus we
> can propagate non-atomic read errors into the clock value.
>
> Cc: Ingo Molnar <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Debugged-by: Thomas Gleixner <[email protected]>
> Signed-off-by: Peter Zijlstra <[email protected]>
> ---
> kernel/sched/clock.c | 19 +++++++++++++++++--
> 1 file changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
> index c685e31..7042ef7 100644
> --- a/kernel/sched/clock.c
> +++ b/kernel/sched/clock.c
> @@ -170,6 +170,21 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
> return clock;
> }
>
> +#ifndef CONFIG_64BIT
We need to add a Kconfig:
config 32BIT
depends on BROKEN
Acked-by: Steven Rostedt <[email protected]>
-- Steve
> +/*
> + * 32bit machines can't atomically read a u64 except using cmpxchg64()
> + */
> +static inline u64 scd_read_clock(struct sched_clock_data *scd)
> +{
> + return cmpxchg64(&scd->clock, 0, 0);
> +}
> +#else
> +static inline u64 scd_read_clock(struct sched_clock_data *scd)
> +{
> + return scd->clock;
> +}
> +#endif
> +
> static u64 sched_clock_remote(struct sched_clock_data *scd)
> {
> struct sched_clock_data *my_scd = this_scd();
> @@ -178,8 +193,8 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
>
> sched_clock_local(my_scd);
> again:
> - this_clock = my_scd->clock;
> - remote_clock = scd->clock;
> + this_clock = scd_clock_read(my_scd);
> + remote_clock = scd_clock_read(scd);
>
> /*
> * Use the opportunity that we have both locks
>
On Fri, Apr 05, 2013 at 06:36:40PM +0200, Peter Zijlstra wrote:
> Thomas spotted a nasty 32bit race in sched_clock_remote() after way too
> many hours of debugging weirdness.
>
> What happens is that sched_clock_remote() does regular machine word
> reads of sched_clock_data::clock; this appears safe since we use
> cmpxchg64() to update the variable and any half-read value would
> trigger a retry.
>
> Except we don't validate the new value 'val' in the same way! Thus we
> can propagate non-atomic read errors into the clock value.
>
> Cc: Ingo Molnar <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Debugged-by: Thomas Gleixner <[email protected]>
> Signed-off-by: Peter Zijlstra <[email protected]>
> ---
> kernel/sched/clock.c | 19 +++++++++++++++++--
> 1 file changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
> index c685e31..7042ef7 100644
> --- a/kernel/sched/clock.c
> +++ b/kernel/sched/clock.c
> @@ -170,6 +170,21 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
> return clock;
> }
>
> +#ifndef CONFIG_64BIT
> +/*
> + * 32bit machines can't atomically read a u64 except using cmpxchg64()
> + */
> +static inline u64 scd_read_clock(struct sched_clock_data *scd)
> +{
> + return cmpxchg64(&scd->clock, 0, 0);
> +}
> +#else
> +static inline u64 scd_read_clock(struct sched_clock_data *scd)
> +{
> + return scd->clock;
> +}
> +#endif
> +
> static u64 sched_clock_remote(struct sched_clock_data *scd)
> {
> struct sched_clock_data *my_scd = this_scd();
> @@ -178,8 +193,8 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
>
> sched_clock_local(my_scd);
> again:
> - this_clock = my_scd->clock;
> - remote_clock = scd->clock;
> + this_clock = scd_clock_read(my_scd);
> + remote_clock = scd_clock_read(scd);
^^^^^^^^^^^^^^
it doesn't match the declaration: scd_read_clock().
Thanks,
Yong
>
> /*
> * Use the opportunity that we have both locks
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
On Tue, 2013-04-09 at 22:55 +0800, Yong Zhang wrote:
> > + this_clock = scd_clock_read(my_scd);
> > + remote_clock = scd_clock_read(scd);
> ^^^^^^^^^^^^^^
> it doesn't match the declaration:
> scd_read_clock().
Yeah, I'm a moron and forgot to compile test or somesuch :-)
Anyway, Thomas wrote a much better patch which made it in; see
a1cbcaa9ea87b87a96b9fc465951dcf36e459ca2.