2023-12-05 14:21:21

by Alexey Dobriyan

[permalink] [raw]
Subject: [PATCH] x86_64: test xmm/ymm register state after execve(2)

Test that xmm/ymm registers are cleared immediately after execve(2).

It is opportunistically named "check_xmm_ymm_zmm" because I don't have
AVX-512 machine but it will be trivial to extend without renaming stuff.

Signed-off-by: Alexey Dobriyan <[email protected]>
---

I want to draw attention to executable sizes:

-rwxr-xr-x. 1 ad ad 723920 Dec 5 12:28 check_initial_reg_state_32
-rwxr-xr-x. 1 ad ad 825904 Dec 5 12:28 check_initial_reg_state_64
-rw-r--r--. 1 ad ad 1901 Dec 5 12:18 check_initial_reg_state.c
-rwxr-xr-x. 1 ad ad 827976 Dec 5 17:09 check_xmm_ymm_zmm_64
-rw-r--r--. 1 ad ad 4705 Dec 5 17:09 check_xmm_ymm_zmm.c

which are absolutely ridiculous!

These tests should fit into 1 page, both!

tools/testing/selftests/x86/Makefile | 2
tools/testing/selftests/x86/check_xmm_ymm_zmm.c | 176 ++++++++++++++++++++++++
2 files changed, 178 insertions(+)

--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -20,6 +20,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
corrupt_xstate_header amx lam test_shadow_stack
+TARGETS_C_64BIT_ONLY += check_xmm_ymm_zmm
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall

@@ -110,6 +111,7 @@ $(OUTPUT)/test_syscall_vdso_32: thunks_32.S
# state.
$(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
$(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
+$(OUTPUT)/check_xmm_ymm_zmm_64: CFLAGS += -Wl,-ee_entry -static

$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack
$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
new file mode 100644
--- /dev/null
+++ b/tools/testing/selftests/x86/check_xmm_ymm_zmm.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2023 Alexey Dobriyan <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that xmm, ymm registers are cleared immediately after execve(2). */
+
+#include <stdio.h>
+#include <string.h>
+
+#if defined __amd64__
+#elif defined __i386__
+#error "fix register count, SSE2 detection"
+#else
+#error
+#endif
+
+#define array_size(a) (sizeof(a) / sizeof(a[0]))
+
+typedef char xmm_t[16];
+static const xmm_t xmm_z;
+static xmm_t xmm[16];
+
+typedef char ymm_t[32];
+static const ymm_t ymm_z;
+static ymm_t ymm[16];
+
+enum {
+ TEST_XMM = 1,
+ TEST_YMM = 2,
+};
+static volatile char g_test;
+
+/*
+ * Homework: write and install #UD handler in assembly and just start
+ * executing AVX-512/AVX2/SSE2 instructions falling back SIMD ladder
+ * if necessary.
+ *
+ * jk, use cpuid instead like any normal programmer would do.
+ */
+asm (
+".pushsection .text;"
+".type e_entry,@function;"
+".global e_entry;"
+"e_entry:"
+ /* test AVX2 support */
+ "mov $7, %eax;"
+ "xor %ecx, %ecx;"
+ "cpuid;"
+ "bt $5, %ebx;"
+ "jnc .Ltest_xmm;"
+
+ "vmovdqu %ymm0, ymm + 32 * 0;"
+ "vmovdqu %ymm1, ymm + 32 * 1;"
+ "vmovdqu %ymm2, ymm + 32 * 2;"
+ "vmovdqu %ymm3, ymm + 32 * 3;"
+ "vmovdqu %ymm4, ymm + 32 * 4;"
+ "vmovdqu %ymm5, ymm + 32 * 5;"
+ "vmovdqu %ymm6, ymm + 32 * 6;"
+ "vmovdqu %ymm7, ymm + 32 * 7;"
+ "vmovdqu %ymm8, ymm + 32 * 8;"
+ "vmovdqu %ymm9, ymm + 32 * 9;"
+ "vmovdqu %ymm10, ymm + 32 * 10;"
+ "vmovdqu %ymm11, ymm + 32 * 11;"
+ "vmovdqu %ymm12, ymm + 32 * 12;"
+ "vmovdqu %ymm13, ymm + 32 * 13;"
+ "vmovdqu %ymm14, ymm + 32 * 14;"
+ "vmovdqu %ymm15, ymm + 32 * 15;"
+ "movb $2, g_test;" /* TEST_YMM */
+ "jmp .Llibc_start;"
+
+".Ltest_xmm:"
+ "movdqu %xmm0, xmm + 16 * 0;"
+ "movdqu %xmm1, xmm + 16 * 1;"
+ "movdqu %xmm2, xmm + 16 * 2;"
+ "movdqu %xmm3, xmm + 16 * 3;"
+ "movdqu %xmm4, xmm + 16 * 4;"
+ "movdqu %xmm5, xmm + 16 * 5;"
+ "movdqu %xmm6, xmm + 16 * 6;"
+ "movdqu %xmm7, xmm + 16 * 7;"
+ "movdqu %xmm8, xmm + 16 * 8;"
+ "movdqu %xmm9, xmm + 16 * 9;"
+ "movdqu %xmm10, xmm + 16 * 10;"
+ "movdqu %xmm11, xmm + 16 * 11;"
+ "movdqu %xmm12, xmm + 16 * 12;"
+ "movdqu %xmm13, xmm + 16 * 13;"
+ "movdqu %xmm14, xmm + 16 * 14;"
+ "movdqu %xmm15, xmm + 16 * 15;"
+ "movb $1, g_test;" /* TEST_XMM */
+ "jmp .Llibc_start;"
+
+".Llibc_start:"
+ /*
+ * Undo cpuid, this is important for clean exit:
+ *
+ * Program received signal SIGSEGV, Segmentation fault.
+ * 0x0000000000000010 in ?? ()
+ * (gdb) bt
+ * #0 0x0000000000000010 in ?? ()
+ * #1 0x00000000004090c9 in __run_exit_handlers ()
+ * #2 0x0000000000409220 in exit ()
+ * #3 0x0000000000401da1 in __libc_start_call_main ()
+ * #4 0x00000000004034f0 in __libc_start_main_impl ()
+ * #5 0x0000000000401555 in _start ()
+ */
+ "xor %eax, %eax;"
+ "xor %ecx, %ecx;"
+ "xor %edx, %edx;"
+ "xor %ebx, %ebx;"
+ "jmp _start;"
+
+".size e_entry, .-e_entry;"
+".popsection;"
+);
+
+#define F stderr
+
+static void print_xymm(FILE *f, const char *pfx, const void *r, int len)
+{
+ fputs(pfx, f);
+ for (int i = 0; i < len; i += 1) {
+ fprintf(f, "%02hhx", *(unsigned char *)(r + i));
+ }
+ fputc('\n', f);
+}
+
+int main(void)
+{
+ int rv = 0;
+ if (g_test == TEST_YMM) {
+ for (int i = 0; i < array_size(ymm); i += 1) {
+ rv |= memcmp(&ymm[i], &ymm_z, sizeof(ymm_t)) != 0;
+ }
+ if (rv) {
+ fprintf(F, "FAIL\tymm\n");
+ for (int i = 0; i < array_size(ymm); i += 1) {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "ymm%-2d = ", i);
+ print_xymm(F, buf, &ymm[i], sizeof(ymm_t));
+ }
+ return 1;
+ } else {
+ fprintf(F, "PASS\tymm0 .. ymm%zu == 0\n", array_size(ymm) - 1);
+ return 0;
+ }
+ } else if (g_test == TEST_XMM) {
+ for (int i = 0; i < array_size(xmm); i += 1) {
+ rv |= memcmp(&xmm[i], &xmm_z, sizeof(xmm_t)) != 0;
+ }
+ if (rv) {
+ fprintf(F, "FAIL\txmm\n");
+ for (int i = 0; i < array_size(xmm); i += 1) {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "xmm%-2d = ", i);
+ print_xymm(F, buf, &xmm[i], sizeof(xmm_t));
+ }
+ return 1;
+ } else {
+ fprintf(F, "PASS\txmm0 .. xmm%zu == 0\n", array_size(xmm) - 1);
+ return 0;
+ }
+ } else {
+ fprintf(F, "FAIL\tg_test %d\n", +g_test);
+ return 1;
+ }
+}


2023-12-05 20:40:44

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH] x86_64: test xmm/ymm register state after execve(2)

On 12/5/23 06:21, Alexey Dobriyan wrote:
> Test that xmm/ymm registers are cleared immediately after execve(2).
>
> It is opportunistically named "check_xmm_ymm_zmm" because I don't have
> AVX-512 machine but it will be trivial to extend without renaming stuff.

Hi Alexey,

This looks pretty useful. I know we've had bugs in this area in the
past. Was there any recent motivation for this, though? Just curious.

> --- /dev/null
> +++ b/tools/testing/selftests/x86/check_xmm_ymm_zmm.c
> @@ -0,0 +1,176 @@
> +/*
> + * Copyright (c) 2023 Alexey Dobriyan <[email protected]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +/* Test that xmm, ymm registers are cleared immediately after execve(2). */

Can this be trimmed down to a nice SPDX header?

> +#include <stdio.h>
> +#include <string.h>
> +
> +#if defined __amd64__
> +#elif defined __i386__
> +#error "fix register count, SSE2 detection"
> +#else
> +#error
> +#endif
> +
> +#define array_size(a) (sizeof(a) / sizeof(a[0]))
> +
> +typedef char xmm_t[16];
> +static const xmm_t xmm_z;
> +static xmm_t xmm[16];
> +
> +typedef char ymm_t[32];
> +static const ymm_t ymm_z;
> +static ymm_t ymm[16];
> +
> +enum {
> + TEST_XMM = 1,
> + TEST_YMM = 2,
> +};
> +static volatile char g_test;
> +
> +/*
> + * Homework: write and install #UD handler in assembly and just start
> + * executing AVX-512/AVX2/SSE2 instructions falling back SIMD ladder
> + * if necessary.
> + *
> + * jk, use cpuid instead like any normal programmer would do.
> + */
> +asm (
> +".pushsection .text;"
> +".type e_entry,@function;"
> +".global e_entry;"
> +"e_entry:"
> + /* test AVX2 support */
> + "mov $7, %eax;"
> + "xor %ecx, %ecx;"
> + "cpuid;"
> + "bt $5, %ebx;"
> + "jnc .Ltest_xmm;"
> +
> + "vmovdqu %ymm0, ymm + 32 * 0;"
> + "vmovdqu %ymm1, ymm + 32 * 1;
...
>
> +".Ltest_xmm:"
> + "movdqu %xmm0, xmm + 16 * 0;"
> + "movdqu %xmm1, xmm + 16 * 1;"

Does this work on systems without XMMs? I know it's not common these
days but it's possible, especially in VMs.

2023-12-06 06:16:45

by Alexey Dobriyan

[permalink] [raw]
Subject: Re: [PATCH] x86_64: test xmm/ymm register state after execve(2)

On Tue, Dec 05, 2023 at 12:39:32PM -0800, Dave Hansen wrote:
> On 12/5/23 06:21, Alexey Dobriyan wrote:
> > Test that xmm/ymm registers are cleared immediately after execve(2).
> >
> > It is opportunistically named "check_xmm_ymm_zmm" because I don't have
> > AVX-512 machine but it will be trivial to extend without renaming stuff.
>
> Hi Alexey,
>
> This looks pretty useful. I know we've had bugs in this area in the
> past. Was there any recent motivation for this, though? Just curious.

I found check_initial_reg_state.c with cool assembly but only general
registers being checked.

> > --- /dev/null
> > +++ b/tools/testing/selftests/x86/check_xmm_ymm_zmm.c
> > @@ -0,0 +1,176 @@
> > +/*
> > + * Copyright (c) 2023 Alexey Dobriyan <[email protected]>
> > + *
> > + * Permission to use, copy, modify, and distribute this software for any
> > + * purpose with or without fee is hereby granted, provided that the above
> > + * copyright notice and this permission notice appear in all copies.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> > + */
> > +/* Test that xmm, ymm registers are cleared immediately after execve(2). */
>
> Can this be trimmed down to a nice SPDX header?

I'd rather not.

This is standalone test program, not part of kernel proper, so if it's
copied somewhere than copyrights are retained. Another project might not
even use SPDX scheme.

> > +#include <stdio.h>
> > +#include <string.h>
> > +
> > +#if defined __amd64__
> > +#elif defined __i386__
> > +#error "fix register count, SSE2 detection"
> > +#else
> > +#error
> > +#endif

> > +".Ltest_xmm:"
> > + "movdqu %xmm0, xmm + 16 * 0;"
> > + "movdqu %xmm1, xmm + 16 * 1;"
>
> Does this work on systems without XMMs? I know it's not common these
> days but it's possible, especially in VMs.

No. But I think all x86_64 systems have SSE2. So it is up to whoever will
port this test to i386.

2023-12-11 18:58:36

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH] x86_64: test xmm/ymm register state after execve(2)

On 12/5/23 22:15, Alexey Dobriyan wrote:
>>> +".Ltest_xmm:"
>>> + "movdqu %xmm0, xmm + 16 * 0;"
>>> + "movdqu %xmm1, xmm + 16 * 1;"
>> Does this work on systems without XMMs? I know it's not common these
>> days but it's possible, especially in VMs.
> No. But I think all x86_64 systems have SSE2. So it is up to whoever will
> port this test to i386.
I somehow got it in my head that we can't do XMM's unless XSAVE is in
the picture. But that's wrong. FXSAVE works on XMM's just fine and
it's a requirement for FPU support, and 64-bit requires the FPU.

It would be nice to spell that out explicitly in the changelog if you
get a chance to resend this.

Also, on the license... You as the copyright holder are, of course, free
to distribute it under any license you want yourself. But for the copy
in the kernel, it would be nice if you chose something you could stick
an SPDX label on. That way, a maintainer knows immediately if the
license is OK or not.