2024-05-07 09:55:26

by Balasubrmanian, Vignesh

[permalink] [raw]
Subject: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

Add a new .note section containing type, size, offset and flags of
every xfeature that is present.

This information will be used by the debuggers to understand the XSAVE
layout of the machine where the core file is dumped, and to read XSAVE
registers, especially during cross-platform debugging.

Some background:

The XSAVE layouts of modern AMD and Intel CPUs differ, especially since
Memory Protection Keys and the AVX-512 features have been inculcated into
the AMD CPUs.
This is since AMD never adopted (and hence never left room in the XSAVE
layout for) the Intel MPX feature. Tools like GDB had assumed a fixed XSAVE
layout matching that of Intel (based on the XCR0 mask).
Hence, the core dumps from AMD CPUs didn't match the known size for the
XCR0 mask. This resulted in GDB and other tools not being able to access
the values of the AVX-512 and PKRU registers on AMD CPUs.
To solve this, an interim solution has been accepted into GDB, and is
already a part of GDB 14, thanks to these series of patches
[ https://sourceware.org/pipermail/gdb-patches/2023-March/198081.html ].
But this patch series depends on heuristics based on the total XSAVE
register set size and the XCR0 mask to infer the layouts of the various
register blocks for core dumps, and hence, is not a foolproof mechanism to
determine the layout of the XSAVE area.

Hence this new core dump note has been proposed as a more sturdy mechanism
to allow GDB/LLDB and other relevant tools to determine the layout of the
XSAVE area of the machine where the corefile was dumped.
The new core dump note (which is being proposed as a per-process .note
section), NT_X86_XSAVE_LAYOUT (0x205) contains an array of structures.
Each structure describes an individual extended feature containing offset,
size and flags (that is obtained through CPUID instruction) in a format
roughly matching the follow C structure:

struct xfeat_component {
u32 xfeat_type;
u32 xfeat_sz;
u32 xfeat_off;
u32 xfeat_flags;
};

Co-developed-by: Jini Susan George <[email protected]>
Signed-off-by: Jini Susan George <[email protected]>
Signed-off-by: Vignesh Balasubramanian <[email protected]>
---
v1->v2: Removed kernel internal defn dependency, code improvements

arch/x86/Kconfig | 1 +
arch/x86/include/asm/elf.h | 34 +++++++++
arch/x86/kernel/fpu/xstate.c | 141 +++++++++++++++++++++++++++++++++++
fs/binfmt_elf.c | 4 +-
include/uapi/linux/elf.h | 1 +
5 files changed, 179 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 928820e61cb5..cc67daab3396 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -105,6 +105,7 @@ config X86
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_ZONE_DMA_SET if EXPERT
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_HAVE_EXTRA_ELF_NOTES
select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1fb83d47711f..5952574db64b 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -13,6 +13,40 @@
#include <asm/auxvec.h>
#include <asm/fsgsbase.h>

+struct xfeat_component {
+ u32 xfeat_type;
+ u32 xfeat_sz;
+ u32 xfeat_off;
+ u32 xfeat_flags;
+} __packed;
+
+_Static_assert(sizeof(struct xfeat_component)%4 == 0, "xfeat_component is not aligned");
+
+enum custom_feature {
+ FEATURE_XSAVE_FP = 0,
+ FEATURE_XSAVE_SSE = 1,
+ FEATURE_XSAVE_YMM = 2,
+ FEATURE_XSAVE_BNDREGS = 3,
+ FEATURE_XSAVE_BNDCSR = 4,
+ FEATURE_XSAVE_OPMASK = 5,
+ FEATURE_XSAVE_ZMM_Hi256 = 6,
+ FEATURE_XSAVE_Hi16_ZMM = 7,
+ FEATURE_XSAVE_PT = 8,
+ FEATURE_XSAVE_PKRU = 9,
+ FEATURE_XSAVE_PASID = 10,
+ FEATURE_XSAVE_CET_USER = 11,
+ FEATURE_XSAVE_CET_SHADOW_STACK = 12,
+ FEATURE_XSAVE_HDC = 13,
+ FEATURE_XSAVE_UINTR = 14,
+ FEATURE_XSAVE_LBR = 15,
+ FEATURE_XSAVE_HWP = 16,
+ FEATURE_XSAVE_XTILE_CFG = 17,
+ FEATURE_XSAVE_XTILE_DATA = 18,
+ FEATURE_MAX,
+ FEATURE_XSAVE_EXTENDED_START = FEATURE_XSAVE_YMM,
+ FEATURE_XSAVE_EXTENDED_END = FEATURE_XSAVE_XTILE_DATA,
+};
+
typedef unsigned long elf_greg_t;

#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 33a214b1a4ce..3d1c3c96e34d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -13,6 +13,7 @@
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
+#include <linux/coredump.h>

#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
@@ -87,6 +88,8 @@ static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
#define XSTATE_FLAG_SUPERVISOR BIT(0)
#define XSTATE_FLAG_ALIGNED64 BIT(1)

+static const char owner_name[] = "LINUX";
+
/*
* Return whether the system supports a given xfeature.
*
@@ -1837,3 +1840,141 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
+#ifdef CONFIG_COREDUMP
+static int get_sub_leaf(int custom_xfeat)
+{
+ switch (custom_xfeat) {
+ case FEATURE_XSAVE_YMM: return XFEATURE_YMM;
+ case FEATURE_XSAVE_BNDREGS: return XFEATURE_BNDREGS;
+ case FEATURE_XSAVE_BNDCSR: return XFEATURE_BNDCSR;
+ case FEATURE_XSAVE_OPMASK: return XFEATURE_OPMASK;
+ case FEATURE_XSAVE_ZMM_Hi256: return XFEATURE_ZMM_Hi256;
+ case FEATURE_XSAVE_Hi16_ZMM: return XFEATURE_Hi16_ZMM;
+ case FEATURE_XSAVE_PT: return XFEATURE_PT_UNIMPLEMENTED_SO_FAR;
+ case FEATURE_XSAVE_PKRU: return XFEATURE_PKRU;
+ case FEATURE_XSAVE_PASID: return XFEATURE_PASID;
+ case FEATURE_XSAVE_CET_USER: return XFEATURE_CET_USER;
+ case FEATURE_XSAVE_CET_SHADOW_STACK: return XFEATURE_CET_KERNEL_UNUSED;
+ case FEATURE_XSAVE_HDC: return XFEATURE_RSRVD_COMP_13;
+ case FEATURE_XSAVE_UINTR: return XFEATURE_RSRVD_COMP_14;
+ case FEATURE_XSAVE_LBR: return XFEATURE_LBR;
+ case FEATURE_XSAVE_HWP: return XFEATURE_RSRVD_COMP_16;
+ case FEATURE_XSAVE_XTILE_CFG: return XFEATURE_XTILE_CFG;
+ case FEATURE_XSAVE_XTILE_DATA: return XFEATURE_XTILE_DATA;
+ default:
+ pr_warn_ratelimited("Not a valid XSAVE Feature.");
+ return 0;
+ }
+}
+
+/*
+ * Dump type, size, offset and flag values for every xfeature that is present.
+ */
+static int dump_xsave_layout_desc(struct coredump_params *cprm)
+{
+ u32 supported_features = 0;
+ struct xfeat_component xc;
+ u32 eax, ebx, ecx, edx;
+ int num_records = 0;
+ int sub_leaf = 0;
+ int i;
+
+ /* Find supported extended features */
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ supported_features = eax;
+
+ for (i = FEATURE_XSAVE_EXTENDED_START;
+ i <= FEATURE_XSAVE_EXTENDED_END; i++) {
+ sub_leaf = get_sub_leaf(i);
+ if (!sub_leaf)
+ continue;
+ if (supported_features & (1U << sub_leaf)) {
+ cpuid_count(XSTATE_CPUID, sub_leaf, &eax, &ebx, &ecx, &edx);
+ xc.xfeat_type = i;
+ xc.xfeat_sz = eax;
+ xc.xfeat_off = ebx;
+ /* Reserved for future use */
+ xc.xfeat_flags = 0;
+
+ if (!dump_emit(cprm, &xc,
+ sizeof(struct xfeat_component)))
+ return 0;
+ num_records++;
+ }
+ }
+
+ return num_records;
+}
+
+static int get_xsave_desc_size(void)
+{
+ int supported_features = 0;
+ int xfeatures_count = 0;
+ u32 eax, ebx, ecx, edx;
+ int sub_leaf = 0;
+ int i;
+
+ /* Find supported extended features */
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ supported_features = eax;
+
+ for (i = FEATURE_XSAVE_EXTENDED_START;
+ i <= FEATURE_XSAVE_EXTENDED_END; i++) {
+ sub_leaf = get_sub_leaf(i);
+ if (!sub_leaf)
+ continue;
+ if (supported_features & (1U << sub_leaf))
+ xfeatures_count++;
+ }
+
+ return xfeatures_count * (sizeof(struct xfeat_component));
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+ int num_records = 0;
+ struct elf_note en;
+
+ en.n_namesz = sizeof(owner_name);
+ en.n_descsz = get_xsave_desc_size();
+ en.n_type = NT_X86_XSAVE_LAYOUT;
+
+ if (!dump_emit(cprm, &en, sizeof(en)))
+ return 1;
+ if (!dump_emit(cprm, owner_name, en.n_namesz))
+ return 1;
+ if (!dump_align(cprm, 4))
+ return 1;
+
+ num_records = dump_xsave_layout_desc(cprm);
+ if (!num_records) {
+ pr_warn_ratelimited("Error adding XSTATE layout ELF note. XSTATE buffer in the core file will be unparseable.");
+ return 1;
+ }
+
+ /* Total size should be equal to the number of records */
+ if ((sizeof(struct xfeat_component) * num_records) != en.n_descsz) {
+ pr_warn_ratelimited("Error adding XSTATE layout ELF note. The size of the .note section does not match with the total size of the records.");
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Return the size of new note.
+ */
+int elf_coredump_extra_notes_size(void)
+{
+ int size = 0;
+
+ /* NOTE Header */
+ size += sizeof(struct elf_note);
+ /* name + align */
+ size += roundup(sizeof(owner_name), 4);
+ size += get_xsave_desc_size();
+
+ return size;
+}
+#endif
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5397b552fbeb..833bcb7e957b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2000,7 +2000,7 @@ static int elf_core_dump(struct coredump_params *cprm)
{
size_t sz = info.size;

- /* For cell spufs */
+ /* For cell spufs and x86 xstate */
sz += elf_coredump_extra_notes_size();

phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
@@ -2064,7 +2064,7 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!write_note_info(&info, cprm))
goto end_coredump;

- /* For cell spufs */
+ /* For cell spufs and x86 xstate */
if (elf_coredump_extra_notes_write(cprm))
goto end_coredump;

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b54b313bcf07..e30a9b47dc87 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -411,6 +411,7 @@ typedef struct elf64_shdr {
#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
/* Old binutils treats 0x203 as a CET state */
#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */
#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
#define NT_S390_TIMER 0x301 /* s390 timer register */
#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
--
2.34.1



2024-05-08 00:14:25

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

Hi Vignesh,

kernel test robot noticed the following build warnings:

[auto build test WARNING on kees/for-next/execve]
[also build test WARNING on tip/x86/core kees/for-next/pstore kees/for-next/kspp linus/master v6.9-rc7 next-20240507]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Vignesh-Balasubramanian/x86-elf-Add-a-new-note-section-containing-Xfeatures-information-to-x86-core-files/20240507-175615
base: https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
patch link: https://lore.kernel.org/r/20240507095330.2674-2-vigbalas%40amd.com
patch subject: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files
config: i386-randconfig-013-20240508 (https://download.01.org/0day-ci/archive/20240508/[email protected]/config)
compiler: clang version 18.1.4 (https://github.com/llvm/llvm-project e6c3289804a67ea0bb6a86fadbe454dd93b8d855)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240508/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

>> arch/x86/kernel/fpu/xstate.c:91:19: warning: unused variable 'owner_name' [-Wunused-const-variable]
91 | static const char owner_name[] = "LINUX";
| ^~~~~~~~~~
1 warning generated.

Kconfig warnings: (for reference only)
WARNING: unmet direct dependencies detected for DRM_I915_DEBUG_GEM
Depends on [n]: HAS_IOMEM [=y] && DRM_I915 [=m] && EXPERT [=y] && DRM_I915_WERROR [=n]
Selected by [m]:
- DRM_I915_DEBUG [=y] && HAS_IOMEM [=y] && DRM_I915 [=m] && EXPERT [=y] && !COMPILE_TEST [=n]


vim +/owner_name +91 arch/x86/kernel/fpu/xstate.c

90
> 91 static const char owner_name[] = "LINUX";
92

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-05-08 00:16:58

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

Hi Vignesh,

kernel test robot noticed the following build warnings:

[auto build test WARNING on kees/for-next/execve]
[also build test WARNING on tip/x86/core kees/for-next/pstore kees/for-next/kspp linus/master v6.9-rc7 next-20240507]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Vignesh-Balasubramanian/x86-elf-Add-a-new-note-section-containing-Xfeatures-information-to-x86-core-files/20240507-175615
base: https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
patch link: https://lore.kernel.org/r/20240507095330.2674-2-vigbalas%40amd.com
patch subject: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files
config: i386-buildonly-randconfig-006-20240508 (https://download.01.org/0day-ci/archive/20240508/[email protected]/config)
compiler: gcc-8 (Ubuntu 8.4.0-3ubuntu2) 8.4.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240508/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

>> arch/x86/kernel/fpu/xstate.c:91:19: warning: 'owner_name' defined but not used [-Wunused-const-variable=]
static const char owner_name[] = "LINUX";
^~~~~~~~~~
In file included from include/linux/string.h:369,
from arch/x86/include/asm/page_32.h:18,
from arch/x86/include/asm/page.h:14,
from arch/x86/include/asm/processor.h:20,
from arch/x86/include/asm/timex.h:5,
from include/linux/timex.h:67,
from include/linux/time32.h:13,
from include/linux/time.h:60,
from include/linux/compat.h:10,
from arch/x86/kernel/fpu/xstate.c:8:
In function 'fortify_memcpy_chk',
inlined from 'membuf_write.isra.6' at include/linux/regset.h:42:3,
inlined from '__copy_xstate_to_uabi_buf' at arch/x86/kernel/fpu/xstate.c:1049:2:
include/linux/fortify-string.h:562:4: warning: call to '__read_overflow2_field' declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()?
__read_overflow2_field(q_size_field, size);
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +/owner_name +91 arch/x86/kernel/fpu/xstate.c

90
> 91 static const char owner_name[] = "LINUX";
92

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-05-08 08:11:54

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

On Tue, May 07, 2024 at 03:23:31PM +0530, Vignesh Balasubramanian wrote:
> Add a new .note section containing type, size, offset and flags of
> every xfeature that is present.
>
> This information will be used by the debuggers to understand the XSAVE
> layout of the machine where the core file is dumped, and to read XSAVE
> registers, especially during cross-platform debugging.
>
> Some background:
>
> The XSAVE layouts of modern AMD and Intel CPUs differ, especially since
> Memory Protection Keys and the AVX-512 features have been inculcated into
> the AMD CPUs.
> This is since AMD never adopted (and hence never left room in the XSAVE
> layout for) the Intel MPX feature. Tools like GDB had assumed a fixed XSAVE
> layout matching that of Intel (based on the XCR0 mask).
> Hence, the core dumps from AMD CPUs didn't match the known size for the
> XCR0 mask. This resulted in GDB and other tools not being able to access
> the values of the AVX-512 and PKRU registers on AMD CPUs.
> To solve this, an interim solution has been accepted into GDB, and is
> already a part of GDB 14, thanks to these series of patches
> [ https://sourceware.org/pipermail/gdb-patches/2023-March/198081.html ].
> But this patch series depends on heuristics based on the total XSAVE
> register set size and the XCR0 mask to infer the layouts of the various
> register blocks for core dumps, and hence, is not a foolproof mechanism to
> determine the layout of the XSAVE area.
>
> Hence this new core dump note has been proposed as a more sturdy mechanism
> to allow GDB/LLDB and other relevant tools to determine the layout of the
> XSAVE area of the machine where the corefile was dumped.
> The new core dump note (which is being proposed as a per-process .note
> section), NT_X86_XSAVE_LAYOUT (0x205) contains an array of structures.
> Each structure describes an individual extended feature containing offset,
> size and flags (that is obtained through CPUID instruction) in a format
> roughly matching the follow C structure:
>
> struct xfeat_component {
> u32 xfeat_type;
> u32 xfeat_sz;
> u32 xfeat_off;
> u32 xfeat_flags;
> };
>
> Co-developed-by: Jini Susan George <[email protected]>
> Signed-off-by: Jini Susan George <[email protected]>
> Signed-off-by: Vignesh Balasubramanian <[email protected]>
> ---
> v1->v2: Removed kernel internal defn dependency, code improvements
>
> arch/x86/Kconfig | 1 +
> arch/x86/include/asm/elf.h | 34 +++++++++
> arch/x86/kernel/fpu/xstate.c | 141 +++++++++++++++++++++++++++++++++++
> fs/binfmt_elf.c | 4 +-
> include/uapi/linux/elf.h | 1 +
> 5 files changed, 179 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 928820e61cb5..cc67daab3396 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -105,6 +105,7 @@ config X86
> select ARCH_HAS_DEBUG_WX
> select ARCH_HAS_ZONE_DMA_SET if EXPERT
> select ARCH_HAVE_NMI_SAFE_CMPXCHG
> + select ARCH_HAVE_EXTRA_ELF_NOTES
> select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
> select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
> select ARCH_MIGHT_HAVE_PC_PARPORT
> diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
> index 1fb83d47711f..5952574db64b 100644
> --- a/arch/x86/include/asm/elf.h
> +++ b/arch/x86/include/asm/elf.h
> @@ -13,6 +13,40 @@
> #include <asm/auxvec.h>
> #include <asm/fsgsbase.h>
>
> +struct xfeat_component {
> + u32 xfeat_type;
> + u32 xfeat_sz;
> + u32 xfeat_off;
> + u32 xfeat_flags;
> +} __packed;
> +
> +_Static_assert(sizeof(struct xfeat_component)%4 == 0, "xfeat_component is not aligned");
> +
> +enum custom_feature {
> + FEATURE_XSAVE_FP = 0,
> + FEATURE_XSAVE_SSE = 1,
> + FEATURE_XSAVE_YMM = 2,
> + FEATURE_XSAVE_BNDREGS = 3,
> + FEATURE_XSAVE_BNDCSR = 4,
> + FEATURE_XSAVE_OPMASK = 5,
> + FEATURE_XSAVE_ZMM_Hi256 = 6,
> + FEATURE_XSAVE_Hi16_ZMM = 7,
> + FEATURE_XSAVE_PT = 8,
> + FEATURE_XSAVE_PKRU = 9,
> + FEATURE_XSAVE_PASID = 10,
> + FEATURE_XSAVE_CET_USER = 11,
> + FEATURE_XSAVE_CET_SHADOW_STACK = 12,
> + FEATURE_XSAVE_HDC = 13,
> + FEATURE_XSAVE_UINTR = 14,
> + FEATURE_XSAVE_LBR = 15,
> + FEATURE_XSAVE_HWP = 16,
> + FEATURE_XSAVE_XTILE_CFG = 17,
> + FEATURE_XSAVE_XTILE_DATA = 18,
> + FEATURE_MAX,
> + FEATURE_XSAVE_EXTENDED_START = FEATURE_XSAVE_YMM,
> + FEATURE_XSAVE_EXTENDED_END = FEATURE_XSAVE_XTILE_DATA,
> +};
> +
> typedef unsigned long elf_greg_t;
>
> #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index 33a214b1a4ce..3d1c3c96e34d 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -13,6 +13,7 @@
> #include <linux/seq_file.h>
> #include <linux/proc_fs.h>
> #include <linux/vmalloc.h>
> +#include <linux/coredump.h>
>
> #include <asm/fpu/api.h>
> #include <asm/fpu/regset.h>
> @@ -87,6 +88,8 @@ static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
> #define XSTATE_FLAG_SUPERVISOR BIT(0)
> #define XSTATE_FLAG_ALIGNED64 BIT(1)
>
> +static const char owner_name[] = "LINUX";

This needs to move under the CONFIG_COREDUMP below (so says the build
bots).

> +
> /*
> * Return whether the system supports a given xfeature.
> *
> @@ -1837,3 +1840,141 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
> return 0;
> }
> #endif /* CONFIG_PROC_PID_ARCH_STATUS */
> +
> +#ifdef CONFIG_COREDUMP
> +static int get_sub_leaf(int custom_xfeat)

Why is this "int"? I don't imagine there are negative features?

> +{
> + switch (custom_xfeat) {
> + case FEATURE_XSAVE_YMM: return XFEATURE_YMM;
> + case FEATURE_XSAVE_BNDREGS: return XFEATURE_BNDREGS;
> + case FEATURE_XSAVE_BNDCSR: return XFEATURE_BNDCSR;
> + case FEATURE_XSAVE_OPMASK: return XFEATURE_OPMASK;
> + case FEATURE_XSAVE_ZMM_Hi256: return XFEATURE_ZMM_Hi256;
> + case FEATURE_XSAVE_Hi16_ZMM: return XFEATURE_Hi16_ZMM;
> + case FEATURE_XSAVE_PT: return XFEATURE_PT_UNIMPLEMENTED_SO_FAR;
> + case FEATURE_XSAVE_PKRU: return XFEATURE_PKRU;
> + case FEATURE_XSAVE_PASID: return XFEATURE_PASID;
> + case FEATURE_XSAVE_CET_USER: return XFEATURE_CET_USER;
> + case FEATURE_XSAVE_CET_SHADOW_STACK: return XFEATURE_CET_KERNEL_UNUSED;
> + case FEATURE_XSAVE_HDC: return XFEATURE_RSRVD_COMP_13;
> + case FEATURE_XSAVE_UINTR: return XFEATURE_RSRVD_COMP_14;
> + case FEATURE_XSAVE_LBR: return XFEATURE_LBR;
> + case FEATURE_XSAVE_HWP: return XFEATURE_RSRVD_COMP_16;
> + case FEATURE_XSAVE_XTILE_CFG: return XFEATURE_XTILE_CFG;
> + case FEATURE_XSAVE_XTILE_DATA: return XFEATURE_XTILE_DATA;
> + default:
> + pr_warn_ratelimited("Not a valid XSAVE Feature.");

This isn't very friendly; it's keeping secrets about the unknown value. :)
Also it's missing a newline. How about:

pr_warn_ratelimited("Not a known XSAVE Feature: %u\n",
custom_xfeat);

> + return 0;
> + }
> +}
> +
> +/*
> + * Dump type, size, offset and flag values for every xfeature that is present.
> + */
> +static int dump_xsave_layout_desc(struct coredump_params *cprm)
> +{
> + u32 supported_features = 0;
> + struct xfeat_component xc;
> + u32 eax, ebx, ecx, edx;
> + int num_records = 0;
> + int sub_leaf = 0;
> + int i;
> +
> + /* Find supported extended features */
> + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
> + supported_features = eax;
> +
> + for (i = FEATURE_XSAVE_EXTENDED_START;
> + i <= FEATURE_XSAVE_EXTENDED_END; i++) {
> + sub_leaf = get_sub_leaf(i);
> + if (!sub_leaf)
> + continue;
> + if (supported_features & (1U << sub_leaf)) {
> + cpuid_count(XSTATE_CPUID, sub_leaf, &eax, &ebx, &ecx, &edx);
> + xc.xfeat_type = i;
> + xc.xfeat_sz = eax;
> + xc.xfeat_off = ebx;
> + /* Reserved for future use */
> + xc.xfeat_flags = 0;
> +
> + if (!dump_emit(cprm, &xc,
> + sizeof(struct xfeat_component)))
> + return 0;
> + num_records++;
> + }
> + }
> +
> + return num_records;
> +}
> +
> +static int get_xsave_desc_size(void)

This can return u32: never negative.

> +{
> + int supported_features = 0;
> + int xfeatures_count = 0;
> + u32 eax, ebx, ecx, edx;
> + int sub_leaf = 0;
> + int i;

"i" can be u32 and then we can fix the get_sub_leaf() arg type.

> +
> + /* Find supported extended features */
> + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
> + supported_features = eax;
> +
> + for (i = FEATURE_XSAVE_EXTENDED_START;
> + i <= FEATURE_XSAVE_EXTENDED_END; i++) {
> + sub_leaf = get_sub_leaf(i);
> + if (!sub_leaf)
> + continue;
> + if (supported_features & (1U << sub_leaf))
> + xfeatures_count++;
> + }
> +
> + return xfeatures_count * (sizeof(struct xfeat_component));
> +}
> +
> +int elf_coredump_extra_notes_write(struct coredump_params *cprm)
> +{
> + int num_records = 0;
> + struct elf_note en;
> +
> + en.n_namesz = sizeof(owner_name);
> + en.n_descsz = get_xsave_desc_size();
> + en.n_type = NT_X86_XSAVE_LAYOUT;
> +
> + if (!dump_emit(cprm, &en, sizeof(en)))
> + return 1;
> + if (!dump_emit(cprm, owner_name, en.n_namesz))
> + return 1;
> + if (!dump_align(cprm, 4))
> + return 1;
> +
> + num_records = dump_xsave_layout_desc(cprm);
> + if (!num_records) {
> + pr_warn_ratelimited("Error adding XSTATE layout ELF note. XSTATE buffer in the core file will be unparseable.");

Missing trailing newline.

> + return 1;
> + }
> +
> + /* Total size should be equal to the number of records */
> + if ((sizeof(struct xfeat_component) * num_records) != en.n_descsz) {
> + pr_warn_ratelimited("Error adding XSTATE layout ELF note. The size of the .note section does not match with the total size of the records.");

Same.

> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Return the size of new note.
> + */
> +int elf_coredump_extra_notes_size(void)
> +{
> + int size = 0;
> +
> + /* NOTE Header */
> + size += sizeof(struct elf_note);
> + /* name + align */
> + size += roundup(sizeof(owner_name), 4);
> + size += get_xsave_desc_size();
> +
> + return size;
> +}
> +#endif

Since it's a long if/endif, add: /* CONFIG_COREDUMP */ after the endif
here.

> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 5397b552fbeb..833bcb7e957b 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -2000,7 +2000,7 @@ static int elf_core_dump(struct coredump_params *cprm)
> {
> size_t sz = info.size;
>
> - /* For cell spufs */
> + /* For cell spufs and x86 xstate */
> sz += elf_coredump_extra_notes_size();
>
> phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
> @@ -2064,7 +2064,7 @@ static int elf_core_dump(struct coredump_params *cprm)
> if (!write_note_info(&info, cprm))
> goto end_coredump;
>
> - /* For cell spufs */
> + /* For cell spufs and x86 xstate */
> if (elf_coredump_extra_notes_write(cprm))
> goto end_coredump;
>
> diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
> index b54b313bcf07..e30a9b47dc87 100644
> --- a/include/uapi/linux/elf.h
> +++ b/include/uapi/linux/elf.h
> @@ -411,6 +411,7 @@ typedef struct elf64_shdr {
> #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
> /* Old binutils treats 0x203 as a CET state */
> #define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
> +#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */
> #define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
> #define NT_S390_TIMER 0x301 /* s390 timer register */
> #define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
> --
> 2.34.1
>

Otherwise looks good. I'd like to see feedback from Intel folks too.

Thanks for working on this!

-Kees

--
Kees Cook

2024-05-08 13:03:42

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

On Tue, May 07 2024 at 15:23, Vignesh Balasubramanian wrote:
> +struct xfeat_component {
> + u32 xfeat_type;
> + u32 xfeat_sz;
> + u32 xfeat_off;
> + u32 xfeat_flags;
> +} __packed;

Why repeating xfeat_ for all member names?

u32 type;
u32 size;
u32 offset;
u32 flags;

is sufficient and obvious, no?

> +enum custom_feature {
> + FEATURE_XSAVE_FP = 0,
> + FEATURE_XSAVE_SSE = 1,
> + FEATURE_XSAVE_YMM = 2,
> + FEATURE_XSAVE_BNDREGS = 3,
> + FEATURE_XSAVE_BNDCSR = 4,
> + FEATURE_XSAVE_OPMASK = 5,
> + FEATURE_XSAVE_ZMM_Hi256 = 6,
> + FEATURE_XSAVE_Hi16_ZMM = 7,
> + FEATURE_XSAVE_PT = 8,
> + FEATURE_XSAVE_PKRU = 9,
> + FEATURE_XSAVE_PASID = 10,
> + FEATURE_XSAVE_CET_USER = 11,
> + FEATURE_XSAVE_CET_SHADOW_STACK = 12,
> + FEATURE_XSAVE_HDC = 13,
> + FEATURE_XSAVE_UINTR = 14,
> + FEATURE_XSAVE_LBR = 15,
> + FEATURE_XSAVE_HWP = 16,
> + FEATURE_XSAVE_XTILE_CFG = 17,
> + FEATURE_XSAVE_XTILE_DATA = 18,
> + FEATURE_MAX,
> + FEATURE_XSAVE_EXTENDED_START = FEATURE_XSAVE_YMM,
> + FEATURE_XSAVE_EXTENDED_END = FEATURE_XSAVE_XTILE_DATA,
> +};

Why can't this use the existing 'enum xfeature' which is providing
exactly the same information already?

> +#ifdef CONFIG_COREDUMP
> +static int get_sub_leaf(int custom_xfeat)
> +{
> + switch (custom_xfeat) {
> + case FEATURE_XSAVE_YMM: return XFEATURE_YMM;
> + case FEATURE_XSAVE_BNDREGS: return XFEATURE_BNDREGS;
> + case FEATURE_XSAVE_BNDCSR: return XFEATURE_BNDCSR;
> + case FEATURE_XSAVE_OPMASK: return XFEATURE_OPMASK;
> + case FEATURE_XSAVE_ZMM_Hi256: return XFEATURE_ZMM_Hi256;
> + case FEATURE_XSAVE_Hi16_ZMM: return XFEATURE_Hi16_ZMM;
> + case FEATURE_XSAVE_PT: return XFEATURE_PT_UNIMPLEMENTED_SO_FAR;
> + case FEATURE_XSAVE_PKRU: return XFEATURE_PKRU;
> + case FEATURE_XSAVE_PASID: return XFEATURE_PASID;
> + case FEATURE_XSAVE_CET_USER: return XFEATURE_CET_USER;
> + case FEATURE_XSAVE_CET_SHADOW_STACK: return XFEATURE_CET_KERNEL_UNUSED;
> + case FEATURE_XSAVE_HDC: return XFEATURE_RSRVD_COMP_13;
> + case FEATURE_XSAVE_UINTR: return XFEATURE_RSRVD_COMP_14;
> + case FEATURE_XSAVE_LBR: return XFEATURE_LBR;
> + case FEATURE_XSAVE_HWP: return XFEATURE_RSRVD_COMP_16;
> + case FEATURE_XSAVE_XTILE_CFG: return XFEATURE_XTILE_CFG;
> + case FEATURE_XSAVE_XTILE_DATA: return XFEATURE_XTILE_DATA;
> + default:
> + pr_warn_ratelimited("Not a valid XSAVE Feature.");
> + return 0;
> + }
> +}

This function then maps the identical enums one to one. The only actual
"functionality" is the default case and that's completely pointless.

> +/*
> + * Dump type, size, offset and flag values for every xfeature that is present.
> + */
> +static int dump_xsave_layout_desc(struct coredump_params *cprm)
> +{
> + u32 supported_features = 0;
> + struct xfeat_component xc;
> + u32 eax, ebx, ecx, edx;
> + int num_records = 0;
> + int sub_leaf = 0;
> + int i;
> +
> + /* Find supported extended features */
> + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
> + supported_features = eax;

Why does this need to re-evaluate CPUID instead of just using the
existing fpu_user_cfg.max_features?

> + for (i = FEATURE_XSAVE_EXTENDED_START;
> + i <= FEATURE_XSAVE_EXTENDED_END; i++) {

Please use the full 100 character line width.

> + sub_leaf = get_sub_leaf(i);
> + if (!sub_leaf)
> + continue;
> + if (supported_features & (1U << sub_leaf)) {
> + cpuid_count(XSTATE_CPUID, sub_leaf, &eax, &ebx, &ecx, &edx);
> + xc.xfeat_type = i;
> + xc.xfeat_sz = eax;
> + xc.xfeat_off = ebx;
> + /* Reserved for future use */
> + xc.xfeat_flags = 0;
> +
> + if (!dump_emit(cprm, &xc,
> + sizeof(struct xfeat_component)))

sizeof(xc), no?

> + return 0;
> + num_records++;
> + }
> + }

This whole thing can be written as:

for_each_extended_xfeature(i, fpu_user_cfg.max_features) {
struct xfeat_component xc = {
.type = i,
.size = xstate_sizes[i],
.offset = xstate_offsets[i],
};

if (!dump_emit(cprm, &xc, sizeof(xc)))
return 0;
num_records++;
}

It omits the features which are supported by the CPU, but not enabled by
the kernel. That's perfectly fine because:

1) the corresponding xfeature bits of those component in the actual
XSAVE dump are guaranteed to be zero

2) the corresponding regions in the actual XSAVE dump are zeroed

So there is absolutely no point in having notes for the not enabled
features at all.

Hmm?

> +
> + return num_records;
> +}
> +
> +static int get_xsave_desc_size(void)
> +{
> + int supported_features = 0;
> + int xfeatures_count = 0;
> + u32 eax, ebx, ecx, edx;
> + int sub_leaf = 0;
> + int i;
> +
> + /* Find supported extended features */
> + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
> + supported_features = eax;
> +
> + for (i = FEATURE_XSAVE_EXTENDED_START;
> + i <= FEATURE_XSAVE_EXTENDED_END; i++) {
> + sub_leaf = get_sub_leaf(i);
> + if (!sub_leaf)
> + continue;
> + if (supported_features & (1U << sub_leaf))
> + xfeatures_count++;
> + }
> + return xfeatures_count * (sizeof(struct xfeat_component));

Then this can be replaced by:

int i, cnt = 0;

for_each_extended_xfeature(i, fpu_user_cfg.max_features)
cnt++;

return cnt * sizeof(struct xfeat_component);

In fact the number of extended features can be calculated once during
boot during xstate initialization.

No?

> +}
> +
> +int elf_coredump_extra_notes_write(struct coredump_params *cprm)
> +{
> + int num_records = 0;
> + struct elf_note en;
> +
> + en.n_namesz = sizeof(owner_name);
> + en.n_descsz = get_xsave_desc_size();
> + en.n_type = NT_X86_XSAVE_LAYOUT;
> +
> + if (!dump_emit(cprm, &en, sizeof(en)))
> + return 1;
> + if (!dump_emit(cprm, owner_name, en.n_namesz))
> + return 1;
> + if (!dump_align(cprm, 4))
> + return 1;
> +
> + num_records = dump_xsave_layout_desc(cprm);
> + if (!num_records) {
> + pr_warn_ratelimited("Error adding XSTATE layout ELF note. XSTATE buffer in the core file will be unparseable.");
> + return 1;

This is going to trigger on all systems which do not support XSAVE. So
why emitting this note in the first place on such systems?

The function should have

if (!fpu_kernel_cfg.max_features)
return 0;

right at the beginning.

Aside of that, these warnings are pointless noise in the case that
dump_emit() caused the function to return early. Dumps can be truncated.

> +/*
> + * Return the size of new note.

Which new note? This is extra notes, no?

> + */
> +int elf_coredump_extra_notes_size(void)
> +{
> + int size = 0;

int size;

> +
> + /* NOTE Header */

Note header ?

> + size += sizeof(struct elf_note);

size = ....

> + /* name + align */

Name plus alignment to 4 bytes ?

> + size += roundup(sizeof(owner_name), 4);
> + size += get_xsave_desc_size();
> +
> + return size;
> +}

And like the write function this wants:

if (!fpu_kernel_cfg.max_features)
return 0;

at the beginning. No point in emitting useless notes and headers.

Thanks,

tglx

2024-05-22 13:13:25

by Balasubrmanian, Vignesh

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files


On 5/8/2024 6:32 PM, Thomas Gleixner wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> On Tue, May 07 2024 at 15:23, Vignesh Balasubramanian wrote:
>> +struct xfeat_component {
>> + u32 xfeat_type;
>> + u32 xfeat_sz;
>> + u32 xfeat_off;
>> + u32 xfeat_flags;
>> +} __packed;
> Why repeating xfeat_ for all member names?
>
> u32 type;
> u32 size;
> u32 offset;
> u32 flags;
>
> is sufficient and obvious, no?
>
>> +enum custom_feature {
>> + FEATURE_XSAVE_FP = 0,
>> + FEATURE_XSAVE_SSE = 1,
>> + FEATURE_XSAVE_YMM = 2,
>> + FEATURE_XSAVE_BNDREGS = 3,
>> + FEATURE_XSAVE_BNDCSR = 4,
>> + FEATURE_XSAVE_OPMASK = 5,
>> + FEATURE_XSAVE_ZMM_Hi256 = 6,
>> + FEATURE_XSAVE_Hi16_ZMM = 7,
>> + FEATURE_XSAVE_PT = 8,
>> + FEATURE_XSAVE_PKRU = 9,
>> + FEATURE_XSAVE_PASID = 10,
>> + FEATURE_XSAVE_CET_USER = 11,
>> + FEATURE_XSAVE_CET_SHADOW_STACK = 12,
>> + FEATURE_XSAVE_HDC = 13,
>> + FEATURE_XSAVE_UINTR = 14,
>> + FEATURE_XSAVE_LBR = 15,
>> + FEATURE_XSAVE_HWP = 16,
>> + FEATURE_XSAVE_XTILE_CFG = 17,
>> + FEATURE_XSAVE_XTILE_DATA = 18,
>> + FEATURE_MAX,
>> + FEATURE_XSAVE_EXTENDED_START = FEATURE_XSAVE_YMM,
>> + FEATURE_XSAVE_EXTENDED_END = FEATURE_XSAVE_XTILE_DATA,
>> +};
> Why can't this use the existing 'enum xfeature' which is providing
> exactly the same information already?
First version of patch was similar to what you mentioned here and other
review comments to use existing kernel definitions.
https://lore.kernel.org/linux-mm/[email protected]/T/

As per the review comment
https://lore.kernel.org/linux-mm/20240314162954.GAZfMmAnYQoRjRbRzc@fat_crate.local/
, modified the patch to be a independent of kernel internal definitions.
Though this enum and below function  "get_sub_leaf" are not useful now, 
it will be required when we extend for a new/different features.

Please let  us know your suggestions.

I will fix all other review comments in my next version.

>> +#ifdef CONFIG_COREDUMP
>> +static int get_sub_leaf(int custom_xfeat)
>> +{
>> + switch (custom_xfeat) {
>> + case FEATURE_XSAVE_YMM: return XFEATURE_YMM;
>> + case FEATURE_XSAVE_BNDREGS: return XFEATURE_BNDREGS;
>> + case FEATURE_XSAVE_BNDCSR: return XFEATURE_BNDCSR;
>> + case FEATURE_XSAVE_OPMASK: return XFEATURE_OPMASK;
>> + case FEATURE_XSAVE_ZMM_Hi256: return XFEATURE_ZMM_Hi256;
>> + case FEATURE_XSAVE_Hi16_ZMM: return XFEATURE_Hi16_ZMM;
>> + case FEATURE_XSAVE_PT: return XFEATURE_PT_UNIMPLEMENTED_SO_FAR;
>> + case FEATURE_XSAVE_PKRU: return XFEATURE_PKRU;
>> + case FEATURE_XSAVE_PASID: return XFEATURE_PASID;
>> + case FEATURE_XSAVE_CET_USER: return XFEATURE_CET_USER;
>> + case FEATURE_XSAVE_CET_SHADOW_STACK: return XFEATURE_CET_KERNEL_UNUSED;
>> + case FEATURE_XSAVE_HDC: return XFEATURE_RSRVD_COMP_13;
>> + case FEATURE_XSAVE_UINTR: return XFEATURE_RSRVD_COMP_14;
>> + case FEATURE_XSAVE_LBR: return XFEATURE_LBR;
>> + case FEATURE_XSAVE_HWP: return XFEATURE_RSRVD_COMP_16;
>> + case FEATURE_XSAVE_XTILE_CFG: return XFEATURE_XTILE_CFG;
>> + case FEATURE_XSAVE_XTILE_DATA: return XFEATURE_XTILE_DATA;
>> + default:
>> + pr_warn_ratelimited("Not a valid XSAVE Feature.");
>> + return 0;
>> + }
>> +}
> This function then maps the identical enums one to one. The only actual
> "functionality" is the default case and that's completely pointless.
thanks,
vigneshbalu.

2024-05-22 15:35:11

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

On Wed, May 22, 2024 at 06:42:55PM +0530, Balasubrmanian, Vignesh wrote:
> > > +enum custom_feature {
> > > + FEATURE_XSAVE_FP = 0,
> > > + FEATURE_XSAVE_SSE = 1,
> > > + FEATURE_XSAVE_YMM = 2,
> > > + FEATURE_XSAVE_BNDREGS = 3,
> > > + FEATURE_XSAVE_BNDCSR = 4,
> > > + FEATURE_XSAVE_OPMASK = 5,
> > > + FEATURE_XSAVE_ZMM_Hi256 = 6,
> > > + FEATURE_XSAVE_Hi16_ZMM = 7,
> > > + FEATURE_XSAVE_PT = 8,
> > > + FEATURE_XSAVE_PKRU = 9,
> > > + FEATURE_XSAVE_PASID = 10,
> > > + FEATURE_XSAVE_CET_USER = 11,
> > > + FEATURE_XSAVE_CET_SHADOW_STACK = 12,
> > > + FEATURE_XSAVE_HDC = 13,
> > > + FEATURE_XSAVE_UINTR = 14,
> > > + FEATURE_XSAVE_LBR = 15,
> > > + FEATURE_XSAVE_HWP = 16,
> > > + FEATURE_XSAVE_XTILE_CFG = 17,
> > > + FEATURE_XSAVE_XTILE_DATA = 18,
> > > + FEATURE_MAX,
> > > + FEATURE_XSAVE_EXTENDED_START = FEATURE_XSAVE_YMM,
> > > + FEATURE_XSAVE_EXTENDED_END = FEATURE_XSAVE_XTILE_DATA,
> > > +};
> > Why can't this use the existing 'enum xfeature' which is providing
> > exactly the same information already?
> First version of patch was similar to what you mentioned here and other
> review comments to use existing kernel definitions.
> https://lore.kernel.org/linux-mm/[email protected]/T/
>
> As per the review comment https://lore.kernel.org/linux-mm/20240314162954.GAZfMmAnYQoRjRbRzc@fat_crate.local/
> , modified the patch to be a independent of kernel internal definitions.
> Though this enum and below function  "get_sub_leaf" are not useful now,  it
> will be required when we extend for a new/different features.

No, Thomas' sugggestion is to use the existing xfeature enum - not
define the same thing again.

Why do you need that enum custom_feature thing if you can use

/*
* List of XSAVE features Linux knows about:
*/
enum xfeature {

from arch/x86/include/asm/fpu/types.h

?

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2024-05-23 14:52:57

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

On Thu, May 23, 2024 at 11:57:00AM +0530, Balasubrmanian, Vignesh wrote:
> Currently, this enum is the same as XSAVE, but when we add other features, this
> enum might have a different value of the XSAVE features and can be modified
> without disturbing the existing kernel code.

We will do that when we cross that bridge, right?

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2024-05-26 04:55:17

by Balasubrmanian, Vignesh

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files


On 5/23/2024 8:15 PM, Borislav Petkov wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> On Thu, May 23, 2024 at 11:57:00AM +0530, Balasubrmanian, Vignesh wrote:
>> Currently, this enum is the same as XSAVE, but when we add other features, this
>> enum might have a different value of the XSAVE features and can be modified
>> without disturbing the existing kernel code.
> We will do that when we cross that bridge, right?

I am struggling to interpret.
If we can add a new enum only when we extend, then as Thomas suggested
can we use other kernel variables as in the first version of the patch
until we extend for other/new features?

thanks,
vigneshbalu.

>
> --
> Regards/Gruss,
> Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette

2024-05-26 09:06:42

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

On Sun, May 26, 2024 at 10:24:41AM +0530, Balasubrmanian, Vignesh wrote:
> If we can add a new enum only when we extend, then as Thomas suggested can
> we use other kernel variables as in the first version of the patch until we
> extend for other/new features?

I assume by "other kernel variables" you mean CPUID?

If so, can you change the layout of your buffer once you export it to
userspace?

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2024-05-31 09:20:03

by Balasubrmanian, Vignesh

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files


On 5/26/2024 2:35 PM, Borislav Petkov wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> On Sun, May 26, 2024 at 10:24:41AM +0530, Balasubrmanian, Vignesh wrote:
>> If we can add a new enum only when we extend, then as Thomas suggested can
>> we use other kernel variables as in the first version of the patch until we
>> extend for other/new features?
> I assume by "other kernel variables" you mean CPUID?
>
> If so, can you change the layout of your buffer once you export it to
> userspace?
In a couple of the previous review mails
(https://lore.kernel.org/lkml/[email protected]/
and
https://lore.kernel.org/lkml/20240314162954.GAZfMmAnYQoRjRbRzc@fat_crate.local/),
it was suggested that the new .note should not use any internal definitions
like "xstate_sizes", "xstate_offsets" and ""xstate_flags" which are also the
direct output of cpuid instruction.

Also, the feature ID in .note records should be independent of the existing
XSAVE feature IDs (this was the comment as I understood). I defined the
new enum
and mapping function to ensure that these remain independent of each other.

Thomas' comments on this version are that we should use existing variables
instead of re-evaluating cpuid. Also, to avoid the new enum and mapping
function which will make not any sense unless it is extended for
newer/different features.

That will be like our first version of the patch
https://lore.kernel.org/lkml/[email protected]/

So other than with the new enum (custom_feature) and the new mapping
function
(get_sub_leaf), we are unsure as to how to maintain the layout to be
independent of x86' cpuid.

In the current version of the patch, the fields -- type, size, and offset
are derived from the cpuid instruction currently (and could be derived from
existing kernel variables in the future). The xsave flags are not used
currently, it can be zero(reserved) for now and its layout can be modified
(as per the need at that time) when the need arises.

If there are other ways to maintain the independence of the layout of a
record
in .note section from the cpuid instruction other than depending on a
new enum
and a new mapping function, we would be glad to follow it.

thanks
vigneshbalu
> --
> Regards/Gruss,
> Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette

2024-05-31 10:59:27

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

Ok,

I went and worked in tglx's comments. This is what this should look
like.

Only build-tested.

---
From cf23110f5cc24b6072ba7a26f31cff3ed486e6b3 Mon Sep 17 00:00:00 2001
From: Vignesh Balasubramanian <[email protected]>
Date: Tue, 7 May 2024 15:23:31 +0530
Subject: [PATCH] x86/elf: Add a new .note section containing xfeatures buffer
layout info to x86 core files

Add a new .note section containing type, size, offset and flags of every
xfeature that is present.

This information will be used by debuggers to understand the XSAVE
layout of the machine where the core file has been dumped, and to read
XSAVE registers, especially during cross-platform debugging.

The XSAVE layouts of modern AMD and Intel CPUs differ, especially since
Memory Protection Keys and the AVX-512 features have been inculcated into
the AMD CPUs.

Since AMD never adopted (and hence never left room in the XSAVE
layout for) the Intel MPX feature. Tools like GDB had assumed a fixed
XSAVE layout matching that of Intel (based on the XCR0 mask).

Hence, core dumps from AMD CPUs didn't match the known size for the
XCR0 mask. This resulted in GDB and other tools not being able to access
the values of the AVX-512 and PKRU registers on AMD CPUs.

To solve this, an interim solution has been accepted into GDB, and is
already a part of GDB 14, see
https://sourceware.org/pipermail/gdb-patches/2023-March/198081.html.

But it depends on heuristics based on the total XSAVE register set size
and the XCR0 mask to infer the layouts of the various register blocks
for core dumps, and hence, is not a foolproof mechanism to determine the
layout of the XSAVE area.

Therefore, add a new core dump note in order to allow GDB/LLDB and other
relevant tools to determine the layout of the XSAVE area of the machine
where the corefile was dumped.

The new core dump note (which is being proposed as a per-process .note
section), NT_X86_XSAVE_LAYOUT (0x205) contains an array of structures.

Each structure describes an individual extended feature containing
offset, size and flags in this format:

struct xfeat_component {
u32 type;
u32 size;
u32 offset;
u32 flags;
};

and in an independent manner, allowing for future extensions without
depending on hw arch specifics like CPUID etc.

[ bp: Work in tglx' comments from
https://lore.kernel.org/r/87wmo4o3r4.ffs@tglx, massage. ]

Co-developed-by: Jini Susan George <[email protected]>
Signed-off-by: Jini Susan George <[email protected]>
Signed-off-by: Vignesh Balasubramanian <[email protected]>
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/elf.h | 9 ++++
arch/x86/kernel/fpu/xstate.c | 84 ++++++++++++++++++++++++++++++++++++
fs/binfmt_elf.c | 4 +-
include/uapi/linux/elf.h | 1 +
5 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e30ea4129d2c..46e44b087c94 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,6 +107,7 @@ config X86
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_ZONE_DMA_SET if EXPERT
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_HAVE_EXTRA_ELF_NOTES
select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1fb83d47711f..cad37090bbd3 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -13,6 +13,15 @@
#include <asm/auxvec.h>
#include <asm/fsgsbase.h>

+struct xfeat_component {
+ u32 type;
+ u32 size;
+ u32 offset;
+ u32 flags;
+} __packed;
+
+_Static_assert(sizeof(struct xfeat_component)%4 == 0, "xfeat_component is not aligned");
+
typedef unsigned long elf_greg_t;

#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c5a026fee5e0..4c26f119c0d6 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -13,6 +13,7 @@
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
+#include <linux/coredump.h>

#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
@@ -1838,3 +1839,86 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
+#ifdef CONFIG_COREDUMP
+static const char owner_name[] = "LINUX";
+
+/*
+ * Dump type, size, offset and flag values for every xfeature that is present.
+ */
+static int dump_xsave_layout_desc(struct coredump_params *cprm)
+{
+ int num_records = 0;
+ int i;
+
+ for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
+ struct xfeat_component xc = {
+ .type = i,
+ .size = xstate_sizes[i],
+ .offset = xstate_offsets[i],
+ /* reserved for future use */
+ .flags = 0,
+ };
+
+ if (!dump_emit(cprm, &xc, sizeof(xc)))
+ return 0;
+
+ num_records++;
+ }
+ return num_records;
+}
+
+static int get_xsave_desc_size(void)
+{
+ int cnt = 0;
+ int i;
+
+ for_each_extended_xfeature(i, fpu_kernel_cfg.max_features)
+ cnt++;
+
+ return cnt * (sizeof(struct xfeat_component));
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+ int num_records = 0;
+ struct elf_note en;
+
+ if (!fpu_kernel_cfg.max_features)
+ return 0;
+
+ en.n_namesz = sizeof(owner_name);
+ en.n_descsz = get_xsave_desc_size();
+ en.n_type = NT_X86_XSAVE_LAYOUT;
+
+ if (!dump_emit(cprm, &en, sizeof(en)))
+ return 1;
+ if (!dump_emit(cprm, owner_name, en.n_namesz))
+ return 1;
+ if (!dump_align(cprm, 4))
+ return 1;
+
+ num_records = dump_xsave_layout_desc(cprm);
+ if (!num_records)
+ return 1;
+
+ /* Total size should be equal to the number of records */
+ if ((sizeof(struct xfeat_component) * num_records) != en.n_descsz)
+ return 1;
+
+ return 0;
+}
+
+int elf_coredump_extra_notes_size(void)
+{
+ int size;
+
+ /* .note header */
+ size = sizeof(struct elf_note);
+ /* name + align */
+ size += roundup(sizeof(owner_name), 4);
+ size += get_xsave_desc_size();
+
+ return size;
+}
+#endif
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a43897b03ce9..3d15c7369b29 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2006,7 +2006,7 @@ static int elf_core_dump(struct coredump_params *cprm)
{
size_t sz = info.size;

- /* For cell spufs */
+ /* For cell spufs and x86 xstate */
sz += elf_coredump_extra_notes_size();

phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
@@ -2070,7 +2070,7 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!write_note_info(&info, cprm))
goto end_coredump;

- /* For cell spufs */
+ /* For cell spufs and x86 xstate */
if (elf_coredump_extra_notes_write(cprm))
goto end_coredump;

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b54b313bcf07..e30a9b47dc87 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -411,6 +411,7 @@ typedef struct elf64_shdr {
#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
/* Old binutils treats 0x203 as a CET state */
#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */
#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
#define NT_S390_TIMER 0x301 /* s390 timer register */
#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
--
2.43.0


--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette