The NoMMU kernel is broken for QEMU virt machine from Linux-5.9-rc6
because clint_time_val is used even before CLINT driver is probed
at following places:
1. rand_initialize() calls get_cycles() which in-turn uses
clint_time_val
2. boot_init_stack_canary() calls get_cycles() which in-turn
uses clint_time_val
The issue#1 (above) is fixed by providing custom random_get_entropy()
for RISC-V NoMMU kernel. For issue#2 (above), we remove dependency of
boot_init_stack_canary() on get_cycles() and this is aligned with the
boot_init_stack_canary() implementations of ARM, ARM64 and MIPS kernel.
Fixes: d5be89a8d118 ("RISC-V: Resurrect the MMIO timer implementation
for M-mode systems")
Signed-off-by: Palmer Dabbelt <[email protected]>
Signed-off-by: Anup Patel <[email protected]>
---
Changes since v2:
- Take different approach and provide custom random_get_entropy() for
RISC-V NoMMU kernel
- Remove dependency of boot_init_stack_canary() on get_cycles()
- Hopefully we don't require to set clint_time_val = NULL in CLINT
driver with a different approach to fix.
Changes since v1:
- Explicitly initialize clint_time_val to NULL in CLINT driver to
avoid hang on Kendryte K210
---
arch/riscv/include/asm/stackprotector.h | 4 ----
arch/riscv/include/asm/timex.h | 13 +++++++++++++
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h
index d95f7b2a7f37..5962f8891f06 100644
--- a/arch/riscv/include/asm/stackprotector.h
+++ b/arch/riscv/include/asm/stackprotector.h
@@ -5,7 +5,6 @@
#include <linux/random.h>
#include <linux/version.h>
-#include <asm/timex.h>
extern unsigned long __stack_chk_guard;
@@ -18,12 +17,9 @@ extern unsigned long __stack_chk_guard;
static __always_inline void boot_init_stack_canary(void)
{
unsigned long canary;
- unsigned long tsc;
/* Try to get a semi random initial value. */
get_random_bytes(&canary, sizeof(canary));
- tsc = get_cycles();
- canary += tsc + (tsc << BITS_PER_LONG/2);
canary ^= LINUX_VERSION_CODE;
canary &= CANARY_MASK;
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index 7f659dda0032..ab104905d4db 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -33,6 +33,19 @@ static inline u32 get_cycles_hi(void)
#define get_cycles_hi get_cycles_hi
#endif /* CONFIG_64BIT */
+/*
+ * Much like MIPS, we may not have a viable counter to use at an early point
+ * in the boot process. Unfortunately we don't have a fallback, so instead
+ * we just return 0.
+ */
+static inline unsigned long random_get_entropy(void)
+{
+ if (unlikely(clint_time_val == NULL))
+ return 0;
+ return get_cycles();
+}
+#define random_get_entropy() random_get_entropy()
+
#else /* CONFIG_RISCV_M_MODE */
static inline cycles_t get_cycles(void)
--
2.25.1
On Sat, 26 Sep 2020 22:39:16 PDT (-0700), Anup Patel wrote:
> The NoMMU kernel is broken for QEMU virt machine from Linux-5.9-rc6
> because clint_time_val is used even before CLINT driver is probed
> at following places:
> 1. rand_initialize() calls get_cycles() which in-turn uses
> clint_time_val
> 2. boot_init_stack_canary() calls get_cycles() which in-turn
> uses clint_time_val
>
> The issue#1 (above) is fixed by providing custom random_get_entropy()
> for RISC-V NoMMU kernel. For issue#2 (above), we remove dependency of
> boot_init_stack_canary() on get_cycles() and this is aligned with the
> boot_init_stack_canary() implementations of ARM, ARM64 and MIPS kernel.
>
> Fixes: d5be89a8d118 ("RISC-V: Resurrect the MMIO timer implementation
> for M-mode systems")
> Signed-off-by: Palmer Dabbelt <[email protected]>
> Signed-off-by: Anup Patel <[email protected]>
> ---
> Changes since v2:
> - Take different approach and provide custom random_get_entropy() for
> RISC-V NoMMU kernel
> - Remove dependency of boot_init_stack_canary() on get_cycles()
> - Hopefully we don't require to set clint_time_val = NULL in CLINT
> driver with a different approach to fix.
> Changes since v1:
> - Explicitly initialize clint_time_val to NULL in CLINT driver to
> avoid hang on Kendryte K210
> ---
> arch/riscv/include/asm/stackprotector.h | 4 ----
> arch/riscv/include/asm/timex.h | 13 +++++++++++++
> 2 files changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h
> index d95f7b2a7f37..5962f8891f06 100644
> --- a/arch/riscv/include/asm/stackprotector.h
> +++ b/arch/riscv/include/asm/stackprotector.h
> @@ -5,7 +5,6 @@
>
> #include <linux/random.h>
> #include <linux/version.h>
> -#include <asm/timex.h>
>
> extern unsigned long __stack_chk_guard;
>
> @@ -18,12 +17,9 @@ extern unsigned long __stack_chk_guard;
> static __always_inline void boot_init_stack_canary(void)
> {
> unsigned long canary;
> - unsigned long tsc;
>
> /* Try to get a semi random initial value. */
> get_random_bytes(&canary, sizeof(canary));
> - tsc = get_cycles();
> - canary += tsc + (tsc << BITS_PER_LONG/2);
> canary ^= LINUX_VERSION_CODE;
> canary &= CANARY_MASK;
>
> diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
> index 7f659dda0032..ab104905d4db 100644
> --- a/arch/riscv/include/asm/timex.h
> +++ b/arch/riscv/include/asm/timex.h
> @@ -33,6 +33,19 @@ static inline u32 get_cycles_hi(void)
> #define get_cycles_hi get_cycles_hi
> #endif /* CONFIG_64BIT */
>
> +/*
> + * Much like MIPS, we may not have a viable counter to use at an early point
> + * in the boot process. Unfortunately we don't have a fallback, so instead
> + * we just return 0.
> + */
> +static inline unsigned long random_get_entropy(void)
> +{
> + if (unlikely(clint_time_val == NULL))
> + return 0;
> + return get_cycles();
> +}
> +#define random_get_entropy() random_get_entropy()
> +
> #else /* CONFIG_RISCV_M_MODE */
>
> static inline cycles_t get_cycles(void)
Reviewed-by: Palmer Dabbelt <[email protected]>
I'm going to wait for Damien to chime in about the NULL initialization boot
failure, though, as I'm a bit worried something else was going on.
Thanks!
On Sat, 2020-09-26 at 22:46 -0700, Palmer Dabbelt wrote:
> On Sat, 26 Sep 2020 22:39:16 PDT (-0700), Anup Patel wrote:
> > The NoMMU kernel is broken for QEMU virt machine from Linux-5.9-rc6
> > because clint_time_val is used even before CLINT driver is probed
> > at following places:
> > 1. rand_initialize() calls get_cycles() which in-turn uses
> > clint_time_val
> > 2. boot_init_stack_canary() calls get_cycles() which in-turn
> > uses clint_time_val
> >
> > The issue#1 (above) is fixed by providing custom random_get_entropy()
> > for RISC-V NoMMU kernel. For issue#2 (above), we remove dependency of
> > boot_init_stack_canary() on get_cycles() and this is aligned with the
> > boot_init_stack_canary() implementations of ARM, ARM64 and MIPS kernel.
> >
> > Fixes: d5be89a8d118 ("RISC-V: Resurrect the MMIO timer implementation
> > for M-mode systems")
> > Signed-off-by: Palmer Dabbelt <[email protected]>
> > Signed-off-by: Anup Patel <[email protected]>
> > ---
> > Changes since v2:
> > - Take different approach and provide custom random_get_entropy() for
> > RISC-V NoMMU kernel
> > - Remove dependency of boot_init_stack_canary() on get_cycles()
> > - Hopefully we don't require to set clint_time_val = NULL in CLINT
> > driver with a different approach to fix.
> > Changes since v1:
> > - Explicitly initialize clint_time_val to NULL in CLINT driver to
> > avoid hang on Kendryte K210
> > ---
> > arch/riscv/include/asm/stackprotector.h | 4 ----
> > arch/riscv/include/asm/timex.h | 13 +++++++++++++
> > 2 files changed, 13 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h
> > index d95f7b2a7f37..5962f8891f06 100644
> > --- a/arch/riscv/include/asm/stackprotector.h
> > +++ b/arch/riscv/include/asm/stackprotector.h
> > @@ -5,7 +5,6 @@
> >
> > #include <linux/random.h>
> > #include <linux/version.h>
> > -#include <asm/timex.h>
> >
> > extern unsigned long __stack_chk_guard;
> >
> > @@ -18,12 +17,9 @@ extern unsigned long __stack_chk_guard;
> > static __always_inline void boot_init_stack_canary(void)
> > {
> > unsigned long canary;
> > - unsigned long tsc;
> >
> > /* Try to get a semi random initial value. */
> > get_random_bytes(&canary, sizeof(canary));
> > - tsc = get_cycles();
> > - canary += tsc + (tsc << BITS_PER_LONG/2);
> > canary ^= LINUX_VERSION_CODE;
> > canary &= CANARY_MASK;
> >
> > diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
> > index 7f659dda0032..ab104905d4db 100644
> > --- a/arch/riscv/include/asm/timex.h
> > +++ b/arch/riscv/include/asm/timex.h
> > @@ -33,6 +33,19 @@ static inline u32 get_cycles_hi(void)
> > #define get_cycles_hi get_cycles_hi
> > #endif /* CONFIG_64BIT */
> >
> > +/*
> > + * Much like MIPS, we may not have a viable counter to use at an early point
> > + * in the boot process. Unfortunately we don't have a fallback, so instead
> > + * we just return 0.
> > + */
> > +static inline unsigned long random_get_entropy(void)
> > +{
> > + if (unlikely(clint_time_val == NULL))
> > + return 0;
> > + return get_cycles();
> > +}
> > +#define random_get_entropy() random_get_entropy()
> > +
> > #else /* CONFIG_RISCV_M_MODE */
> >
> > static inline cycles_t get_cycles(void)
>
> Reviewed-by: Palmer Dabbelt <[email protected]>
>
> I'm going to wait for Damien to chime in about the NULL initialization boot
> failure, though, as I'm a bit worried something else was going on.
>
> Thanks!
For Kendryte, no problems. Boots correctly.
Tested-by: Damien Le Moal <[email protected]>
--
Damien Le Moal
Western Digital
On Sun, 2020-09-27 at 11:09 +0530, Anup Patel wrote:
> The NoMMU kernel is broken for QEMU virt machine from Linux-5.9-rc6
> because clint_time_val is used even before CLINT driver is probed
> at following places:
> 1. rand_initialize() calls get_cycles() which in-turn uses
> clint_time_val
> 2. boot_init_stack_canary() calls get_cycles() which in-turn
> uses clint_time_val
>
> The issue#1 (above) is fixed by providing custom random_get_entropy()
> for RISC-V NoMMU kernel. For issue#2 (above), we remove dependency of
> boot_init_stack_canary() on get_cycles() and this is aligned with the
> boot_init_stack_canary() implementations of ARM, ARM64 and MIPS kernel.
>
> Fixes: d5be89a8d118 ("RISC-V: Resurrect the MMIO timer implementation
> for M-mode systems")
> Signed-off-by: Palmer Dabbelt <[email protected]>
> Signed-off-by: Anup Patel <[email protected]>
> ---
> Changes since v2:
> - Take different approach and provide custom random_get_entropy() for
> RISC-V NoMMU kernel
> - Remove dependency of boot_init_stack_canary() on get_cycles()
> - Hopefully we don't require to set clint_time_val = NULL in CLINT
> driver with a different approach to fix.
> Changes since v1:
> - Explicitly initialize clint_time_val to NULL in CLINT driver to
> avoid hang on Kendryte K210
> ---
> arch/riscv/include/asm/stackprotector.h | 4 ----
> arch/riscv/include/asm/timex.h | 13 +++++++++++++
> 2 files changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h
> index d95f7b2a7f37..5962f8891f06 100644
> --- a/arch/riscv/include/asm/stackprotector.h
> +++ b/arch/riscv/include/asm/stackprotector.h
> @@ -5,7 +5,6 @@
>
> #include <linux/random.h>
> #include <linux/version.h>
> -#include <asm/timex.h>
>
> extern unsigned long __stack_chk_guard;
>
> @@ -18,12 +17,9 @@ extern unsigned long __stack_chk_guard;
> static __always_inline void boot_init_stack_canary(void)
> {
> unsigned long canary;
> - unsigned long tsc;
>
> /* Try to get a semi random initial value. */
> get_random_bytes(&canary, sizeof(canary));
> - tsc = get_cycles();
> - canary += tsc + (tsc << BITS_PER_LONG/2);
> canary ^= LINUX_VERSION_CODE;
> canary &= CANARY_MASK;
>
> diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
> index 7f659dda0032..ab104905d4db 100644
> --- a/arch/riscv/include/asm/timex.h
> +++ b/arch/riscv/include/asm/timex.h
> @@ -33,6 +33,19 @@ static inline u32 get_cycles_hi(void)
> #define get_cycles_hi get_cycles_hi
> #endif /* CONFIG_64BIT */
>
> +/*
> + * Much like MIPS, we may not have a viable counter to use at an early point
> + * in the boot process. Unfortunately we don't have a fallback, so instead
> + * we just return 0.
> + */
> +static inline unsigned long random_get_entropy(void)
> +{
> + if (unlikely(clint_time_val == NULL))
> + return 0;
> + return get_cycles();
> +}
> +#define random_get_entropy() random_get_entropy()
> +
> #else /* CONFIG_RISCV_M_MODE */
>
> static inline cycles_t get_cycles(void)
Did not reply to the patch... So again for Kendryte:
Tested-by: Damien Le Moal <[email protected]>
--
Damien Le Moal
Western Digital
On Sat, 26 Sep 2020 22:52:19 PDT (-0700), Damien Le Moal wrote:
> On Sun, 2020-09-27 at 11:09 +0530, Anup Patel wrote:
>> The NoMMU kernel is broken for QEMU virt machine from Linux-5.9-rc6
>> because clint_time_val is used even before CLINT driver is probed
>> at following places:
>> 1. rand_initialize() calls get_cycles() which in-turn uses
>> clint_time_val
>> 2. boot_init_stack_canary() calls get_cycles() which in-turn
>> uses clint_time_val
>>
>> The issue#1 (above) is fixed by providing custom random_get_entropy()
>> for RISC-V NoMMU kernel. For issue#2 (above), we remove dependency of
>> boot_init_stack_canary() on get_cycles() and this is aligned with the
>> boot_init_stack_canary() implementations of ARM, ARM64 and MIPS kernel.
>>
>> Fixes: d5be89a8d118 ("RISC-V: Resurrect the MMIO timer implementation
>> for M-mode systems")
>> Signed-off-by: Palmer Dabbelt <[email protected]>
>> Signed-off-by: Anup Patel <[email protected]>
>> ---
>> Changes since v2:
>> - Take different approach and provide custom random_get_entropy() for
>> RISC-V NoMMU kernel
>> - Remove dependency of boot_init_stack_canary() on get_cycles()
>> - Hopefully we don't require to set clint_time_val = NULL in CLINT
>> driver with a different approach to fix.
>> Changes since v1:
>> - Explicitly initialize clint_time_val to NULL in CLINT driver to
>> avoid hang on Kendryte K210
>> ---
>> arch/riscv/include/asm/stackprotector.h | 4 ----
>> arch/riscv/include/asm/timex.h | 13 +++++++++++++
>> 2 files changed, 13 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h
>> index d95f7b2a7f37..5962f8891f06 100644
>> --- a/arch/riscv/include/asm/stackprotector.h
>> +++ b/arch/riscv/include/asm/stackprotector.h
>> @@ -5,7 +5,6 @@
>>
>> #include <linux/random.h>
>> #include <linux/version.h>
>> -#include <asm/timex.h>
>>
>> extern unsigned long __stack_chk_guard;
>>
>> @@ -18,12 +17,9 @@ extern unsigned long __stack_chk_guard;
>> static __always_inline void boot_init_stack_canary(void)
>> {
>> unsigned long canary;
>> - unsigned long tsc;
>>
>> /* Try to get a semi random initial value. */
>> get_random_bytes(&canary, sizeof(canary));
>> - tsc = get_cycles();
>> - canary += tsc + (tsc << BITS_PER_LONG/2);
>> canary ^= LINUX_VERSION_CODE;
>> canary &= CANARY_MASK;
>>
>> diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
>> index 7f659dda0032..ab104905d4db 100644
>> --- a/arch/riscv/include/asm/timex.h
>> +++ b/arch/riscv/include/asm/timex.h
>> @@ -33,6 +33,19 @@ static inline u32 get_cycles_hi(void)
>> #define get_cycles_hi get_cycles_hi
>> #endif /* CONFIG_64BIT */
>>
>> +/*
>> + * Much like MIPS, we may not have a viable counter to use at an early point
>> + * in the boot process. Unfortunately we don't have a fallback, so instead
>> + * we just return 0.
>> + */
>> +static inline unsigned long random_get_entropy(void)
>> +{
>> + if (unlikely(clint_time_val == NULL))
>> + return 0;
>> + return get_cycles();
>> +}
>> +#define random_get_entropy() random_get_entropy()
>> +
>> #else /* CONFIG_RISCV_M_MODE */
>>
>> static inline cycles_t get_cycles(void)
>
> Did not reply to the patch... So again for Kendryte:
>
> Tested-by: Damien Le Moal <[email protected]>
Thanks. I've put this on for-next, right after the patch I just posted to add
EXPORT_SYMBOL(clint_time_val) as it turns out random_get_entropy() is used by a
driver.