2024-02-05 15:49:32

by Heiko Carstens

[permalink] [raw]
Subject: [PATCH 2/2] s390/fpu: make use of __uninitialized macro

Code sections in s390 specific kernel code which use floating point or
vector registers all come with a 520 byte stack variable to save already in
use registers, if required.

With INIT_STACK_ALL_PATTERN or INIT_STACK_ALL_ZERO enabled this variable
will always be initialized on function entry in addition to saving register
contents, which contradicts the intend (performance improvement) of such
code sections.

Therefore provide a DECLARE_KERNEL_FPU_ONSTACK() macro which provides
struct kernel_fpu variables with an __uninitialized attribute, and convert
all existing code to use this.

This way only this specific type of stack variable will not be initialized,
regardless of config options.

Signed-off-by: Heiko Carstens <[email protected]>
---
arch/s390/crypto/chacha-glue.c | 2 +-
arch/s390/crypto/crc32-vx.c | 2 +-
arch/s390/include/asm/fpu/types.h | 3 +++
arch/s390/kernel/sysinfo.c | 2 +-
lib/raid6/s390vx.uc | 4 ++--
5 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
index ed9959e6f714..a823132fc563 100644
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -22,7 +22,7 @@ static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
unsigned int nbytes, const u32 *key,
u32 *counter)
{
- struct kernel_fpu vxstate;
+ DECLARE_KERNEL_FPU_ONSTACK(vxstate);

kernel_fpu_begin(&vxstate, KERNEL_VXR);
chacha20_vx(dst, src, nbytes, key, counter);
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 017143e9cef7..6ae3e3ff5b0a 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -49,8 +49,8 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
static u32 __pure ___fname(u32 crc, \
unsigned char const *data, size_t datalen) \
{ \
- struct kernel_fpu vxstate; \
unsigned long prealign, aligned, remaining; \
+ DECLARE_KERNEL_FPU_ONSTACK(vxstate); \
\
if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
return ___crc32_sw(crc, data, datalen); \
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
index d889e9436865..b1afa13c07b7 100644
--- a/arch/s390/include/asm/fpu/types.h
+++ b/arch/s390/include/asm/fpu/types.h
@@ -35,4 +35,7 @@ struct kernel_fpu {
};
};

+#define DECLARE_KERNEL_FPU_ONSTACK(name) \
+ struct kernel_fpu name __uninitialized
+
#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index f6f8f498c9be..b439f17516eb 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -426,9 +426,9 @@ subsys_initcall(create_proc_service_level);
*/
void s390_adjust_jiffies(void)
{
+ DECLARE_KERNEL_FPU_ONSTACK(fpu);
struct sysinfo_1_2_2 *info;
unsigned long capability;
- struct kernel_fpu fpu;

info = (void *) get_zeroed_page(GFP_KERNEL);
if (!info)
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
index cd0b9e95f499..7b0b355e1a26 100644
--- a/lib/raid6/s390vx.uc
+++ b/lib/raid6/s390vx.uc
@@ -81,7 +81,7 @@ static inline void COPY_VEC(int x, int y)

static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
- struct kernel_fpu vxstate;
+ DECLARE_KERNEL_FPU_ONSTACK(vxstate);
u8 **dptr, *p, *q;
int d, z, z0;

@@ -114,7 +114,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
{
- struct kernel_fpu vxstate;
+ DECLARE_KERNEL_FPU_ONSTACK(vxstate);
u8 **dptr, *p, *q;
int d, z, z0;

--
2.40.1



2024-02-05 16:36:48

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PATCH 2/2] s390/fpu: make use of __uninitialized macro

On Mon, Feb 05, 2024 at 08:26:32AM -0800, Kees Cook wrote:
> > +#define DECLARE_KERNEL_FPU_ONSTACK(name) \
> > + struct kernel_fpu name __uninitialized
>
> Are there cases when struct kernel_fpu should be initialized? e.g.
> should the attribute just be added to the struct definition instead of
> marking each use?

I tried that, but failed:

/arch/s390/include/asm/fpu/types.h:36:3: warning: '__uninitialized__' attribute only applies to local variables [-Wignored-attributes]
36 | } __uninitialized;
| ^
/include/linux/compiler_attributes.h:343:42: note: expanded from macro '__uninitialized'
343 | # define __uninitialized __attribute__((__uninitialized__))
| ^

That's why I came up with this macro. I'd prefer to have this added only to
the struct definition, but looks like this is not possible (or I just can't
figure out who to do that).

2024-02-05 16:42:49

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 2/2] s390/fpu: make use of __uninitialized macro

On Mon, Feb 05, 2024 at 04:48:44PM +0100, Heiko Carstens wrote:
> Code sections in s390 specific kernel code which use floating point or
> vector registers all come with a 520 byte stack variable to save already in
> use registers, if required.
>
> With INIT_STACK_ALL_PATTERN or INIT_STACK_ALL_ZERO enabled this variable
> will always be initialized on function entry in addition to saving register
> contents, which contradicts the intend (performance improvement) of such
> code sections.
>
> Therefore provide a DECLARE_KERNEL_FPU_ONSTACK() macro which provides
> struct kernel_fpu variables with an __uninitialized attribute, and convert
> all existing code to use this.
>
> This way only this specific type of stack variable will not be initialized,
> regardless of config options.
>
> Signed-off-by: Heiko Carstens <[email protected]>
> ---
> arch/s390/crypto/chacha-glue.c | 2 +-
> arch/s390/crypto/crc32-vx.c | 2 +-
> arch/s390/include/asm/fpu/types.h | 3 +++
> arch/s390/kernel/sysinfo.c | 2 +-
> lib/raid6/s390vx.uc | 4 ++--
> 5 files changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
> index ed9959e6f714..a823132fc563 100644
> --- a/arch/s390/crypto/chacha-glue.c
> +++ b/arch/s390/crypto/chacha-glue.c
> @@ -22,7 +22,7 @@ static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
> unsigned int nbytes, const u32 *key,
> u32 *counter)
> {
> - struct kernel_fpu vxstate;
> + DECLARE_KERNEL_FPU_ONSTACK(vxstate);
>
> kernel_fpu_begin(&vxstate, KERNEL_VXR);
> chacha20_vx(dst, src, nbytes, key, counter);
> diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
> index 017143e9cef7..6ae3e3ff5b0a 100644
> --- a/arch/s390/crypto/crc32-vx.c
> +++ b/arch/s390/crypto/crc32-vx.c
> @@ -49,8 +49,8 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
> static u32 __pure ___fname(u32 crc, \
> unsigned char const *data, size_t datalen) \
> { \
> - struct kernel_fpu vxstate; \
> unsigned long prealign, aligned, remaining; \
> + DECLARE_KERNEL_FPU_ONSTACK(vxstate); \
> \
> if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
> return ___crc32_sw(crc, data, datalen); \
> diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
> index d889e9436865..b1afa13c07b7 100644
> --- a/arch/s390/include/asm/fpu/types.h
> +++ b/arch/s390/include/asm/fpu/types.h
> @@ -35,4 +35,7 @@ struct kernel_fpu {
> };
> };
>
> +#define DECLARE_KERNEL_FPU_ONSTACK(name) \
> + struct kernel_fpu name __uninitialized

Are there cases when struct kernel_fpu should be initialized? e.g.
should the attribute just be added to the struct definition instead of
marking each use?

-Kees

> +
> #endif /* _ASM_S390_FPU_TYPES_H */
> diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
> index f6f8f498c9be..b439f17516eb 100644
> --- a/arch/s390/kernel/sysinfo.c
> +++ b/arch/s390/kernel/sysinfo.c
> @@ -426,9 +426,9 @@ subsys_initcall(create_proc_service_level);
> */
> void s390_adjust_jiffies(void)
> {
> + DECLARE_KERNEL_FPU_ONSTACK(fpu);
> struct sysinfo_1_2_2 *info;
> unsigned long capability;
> - struct kernel_fpu fpu;
>
> info = (void *) get_zeroed_page(GFP_KERNEL);
> if (!info)
> diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
> index cd0b9e95f499..7b0b355e1a26 100644
> --- a/lib/raid6/s390vx.uc
> +++ b/lib/raid6/s390vx.uc
> @@ -81,7 +81,7 @@ static inline void COPY_VEC(int x, int y)
>
> static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
> {
> - struct kernel_fpu vxstate;
> + DECLARE_KERNEL_FPU_ONSTACK(vxstate);
> u8 **dptr, *p, *q;
> int d, z, z0;
>
> @@ -114,7 +114,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
> static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
> size_t bytes, void **ptrs)
> {
> - struct kernel_fpu vxstate;
> + DECLARE_KERNEL_FPU_ONSTACK(vxstate);
> u8 **dptr, *p, *q;
> int d, z, z0;
>
> --
> 2.40.1
>

--
Kees Cook

2024-02-05 17:00:38

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 2/2] s390/fpu: make use of __uninitialized macro

On Mon, Feb 05, 2024 at 05:35:29PM +0100, Heiko Carstens wrote:
> On Mon, Feb 05, 2024 at 08:26:32AM -0800, Kees Cook wrote:
> > > +#define DECLARE_KERNEL_FPU_ONSTACK(name) \
> > > + struct kernel_fpu name __uninitialized
> >
> > Are there cases when struct kernel_fpu should be initialized? e.g.
> > should the attribute just be added to the struct definition instead of
> > marking each use?
>
> I tried that, but failed:
>
> ./arch/s390/include/asm/fpu/types.h:36:3: warning: '__uninitialized__' attribute only applies to local variables [-Wignored-attributes]
> 36 | } __uninitialized;
> | ^

Oh. That's extremely disappointing. I think we may want to consider
opening bug reports with GCC and Clang for this. Not that it'll help us
now, since it needs to actually work today. Bummer!

In that case:

Reviewed-by: Kees Cook <[email protected]>

--
Kees Cook

2024-02-06 01:31:32

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH 2/2] s390/fpu: make use of __uninitialized macro

On Mon, Feb 05, 2024 at 04:48:44PM +0100, Heiko Carstens wrote:
> Code sections in s390 specific kernel code which use floating point or
> vector registers all come with a 520 byte stack variable to save already in
> use registers, if required.
>
> With INIT_STACK_ALL_PATTERN or INIT_STACK_ALL_ZERO enabled this variable
> will always be initialized on function entry in addition to saving register
> contents, which contradicts the intend (performance improvement) of such
> code sections.
>
> Therefore provide a DECLARE_KERNEL_FPU_ONSTACK() macro which provides
> struct kernel_fpu variables with an __uninitialized attribute, and convert
> all existing code to use this.
>
> This way only this specific type of stack variable will not be initialized,
> regardless of config options.
>
> Signed-off-by: Heiko Carstens <[email protected]>

Reviewed-by: Nathan Chancellor <[email protected]>

> ---
> arch/s390/crypto/chacha-glue.c | 2 +-
> arch/s390/crypto/crc32-vx.c | 2 +-
> arch/s390/include/asm/fpu/types.h | 3 +++
> arch/s390/kernel/sysinfo.c | 2 +-
> lib/raid6/s390vx.uc | 4 ++--
> 5 files changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
> index ed9959e6f714..a823132fc563 100644
> --- a/arch/s390/crypto/chacha-glue.c
> +++ b/arch/s390/crypto/chacha-glue.c
> @@ -22,7 +22,7 @@ static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
> unsigned int nbytes, const u32 *key,
> u32 *counter)
> {
> - struct kernel_fpu vxstate;
> + DECLARE_KERNEL_FPU_ONSTACK(vxstate);
>
> kernel_fpu_begin(&vxstate, KERNEL_VXR);
> chacha20_vx(dst, src, nbytes, key, counter);
> diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
> index 017143e9cef7..6ae3e3ff5b0a 100644
> --- a/arch/s390/crypto/crc32-vx.c
> +++ b/arch/s390/crypto/crc32-vx.c
> @@ -49,8 +49,8 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
> static u32 __pure ___fname(u32 crc, \
> unsigned char const *data, size_t datalen) \
> { \
> - struct kernel_fpu vxstate; \
> unsigned long prealign, aligned, remaining; \
> + DECLARE_KERNEL_FPU_ONSTACK(vxstate); \
> \
> if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
> return ___crc32_sw(crc, data, datalen); \
> diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
> index d889e9436865..b1afa13c07b7 100644
> --- a/arch/s390/include/asm/fpu/types.h
> +++ b/arch/s390/include/asm/fpu/types.h
> @@ -35,4 +35,7 @@ struct kernel_fpu {
> };
> };
>
> +#define DECLARE_KERNEL_FPU_ONSTACK(name) \
> + struct kernel_fpu name __uninitialized
> +
> #endif /* _ASM_S390_FPU_TYPES_H */
> diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
> index f6f8f498c9be..b439f17516eb 100644
> --- a/arch/s390/kernel/sysinfo.c
> +++ b/arch/s390/kernel/sysinfo.c
> @@ -426,9 +426,9 @@ subsys_initcall(create_proc_service_level);
> */
> void s390_adjust_jiffies(void)
> {
> + DECLARE_KERNEL_FPU_ONSTACK(fpu);
> struct sysinfo_1_2_2 *info;
> unsigned long capability;
> - struct kernel_fpu fpu;
>
> info = (void *) get_zeroed_page(GFP_KERNEL);
> if (!info)
> diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
> index cd0b9e95f499..7b0b355e1a26 100644
> --- a/lib/raid6/s390vx.uc
> +++ b/lib/raid6/s390vx.uc
> @@ -81,7 +81,7 @@ static inline void COPY_VEC(int x, int y)
>
> static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
> {
> - struct kernel_fpu vxstate;
> + DECLARE_KERNEL_FPU_ONSTACK(vxstate);
> u8 **dptr, *p, *q;
> int d, z, z0;
>
> @@ -114,7 +114,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
> static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
> size_t bytes, void **ptrs)
> {
> - struct kernel_fpu vxstate;
> + DECLARE_KERNEL_FPU_ONSTACK(vxstate);
> u8 **dptr, *p, *q;
> int d, z, z0;
>
> --
> 2.40.1
>