2016-10-12 18:01:35

by Piotr Luc

[permalink] [raw]
Subject: [PATCH] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.

AVX512_4VNNIW - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

The new instructions are to be used in future Intel Xeon & Xeon Phi
processors.

The spec can be found in Intel Software Developer Manual or in
Instruction Set Extensions Programming Reference. See
https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.

Signed-off-by: Piotr Luc <[email protected]>
Reviewed-by: Dave Hansen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
arch/x86/include/asm/cpufeature.h | 7 +++++--
arch/x86/include/asm/cpufeatures.h | 6 +++++-
arch/x86/include/asm/disabled-features.h | 3 ++-
arch/x86/include/asm/required-features.h | 3 ++-
arch/x86/kernel/cpu/common.c | 1 +
arch/x86/kernel/fpu/xstate.c | 2 ++
tools/arch/x86/include/asm/cpufeatures.h | 6 +++++-
tools/arch/x86/include/asm/disabled-features.h | 3 ++-
tools/arch/x86/include/asm/required-features.h | 3 ++-
9 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d2b69f..617452e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -28,6 +28,7 @@ enum cpuid_leafs
CPUID_8000_000A_EDX,
CPUID_7_ECX,
CPUID_8000_0007_EBX,
+ CPUID_7_EDX,
};

#ifdef CONFIG_X86_FEATURE_NAMES
@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))

#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))

#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308..309fd2d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 18 /* N 32-bit words worth of info */
+#define NCAPINTS 19 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */

/*
@@ -286,6 +286,10 @@
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */

+/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 85599ad..8b45e08 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -57,6 +57,7 @@
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
#define DISABLED_MASK17 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fac9a5c..6847d85 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bcc9ccc..6e6189f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,6 +675,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)

c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
c->x86_capability[CPUID_7_ECX] = ecx;
+ c->x86_capability[CPUID_7_EDX] = edx;
}

/* Extended state features: level 0x0000000d */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 01567aa..7dbd480 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -73,6 +73,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
setup_clear_cpu_cap(X86_FEATURE_PKU);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
}

/*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 92a8308..309fd2d 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 18 /* N 32-bit words worth of info */
+#define NCAPINTS 19 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */

/*
@@ -286,6 +286,10 @@
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */

+/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
+
/*
* BUG word(s)
*/
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 85599ad..8b45e08 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -57,6 +57,7 @@
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
#define DISABLED_MASK17 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index fac9a5c..6847d85 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_REQUIRED_FEATURES_H */
--
2.10.1


Subject: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

Commit-ID: a518dcc82b6162009c8ca3d169fe61c81536ff17
Gitweb: http://git.kernel.org/tip/a518dcc82b6162009c8ca3d169fe61c81536ff17
Author: Piotr Luc <[email protected]>
AuthorDate: Wed, 12 Oct 2016 19:57:31 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 16 Oct 2016 11:32:11 +0200

x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

AVX512_4VNNIW - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

The new instructions are to be used in future Intel Xeon & Xeon Phi
processors.

The spec can be found in Intel Software Developer Manual or in
Instruction Set Extensions Programming Reference. See
https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.

Signed-off-by: Piotr Luc <[email protected]>
Reviewed-by: Dave Hansen <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 7 +++++--
arch/x86/include/asm/cpufeatures.h | 6 +++++-
arch/x86/include/asm/disabled-features.h | 3 ++-
arch/x86/include/asm/required-features.h | 3 ++-
arch/x86/kernel/cpu/common.c | 1 +
arch/x86/kernel/fpu/xstate.c | 2 ++
tools/arch/x86/include/asm/cpufeatures.h | 6 +++++-
tools/arch/x86/include/asm/disabled-features.h | 3 ++-
tools/arch/x86/include/asm/required-features.h | 3 ++-
9 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d2b69f..617452e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -28,6 +28,7 @@ enum cpuid_leafs
CPUID_8000_000A_EDX,
CPUID_7_ECX,
CPUID_8000_0007_EBX,
+ CPUID_7_EDX,
};

#ifdef CONFIG_X86_FEATURE_NAMES
@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))

#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))

#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1188bc8..6697b75 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 18 /* N 32-bit words worth of info */
+#define NCAPINTS 19 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */

/*
@@ -285,6 +285,10 @@
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */

+/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 85599ad..8b45e08 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -57,6 +57,7 @@
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
#define DISABLED_MASK17 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fac9a5c..6847d85 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bd910a..424a620 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,6 +675,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)

c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
c->x86_capability[CPUID_7_ECX] = ecx;
+ c->x86_capability[CPUID_7_EDX] = edx;
}

/* Extended state features: level 0x0000000d */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 124aa5c..095ef7d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
setup_clear_cpu_cap(X86_FEATURE_PKU);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
}

/*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1188bc8..6697b75 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 18 /* N 32-bit words worth of info */
+#define NCAPINTS 19 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */

/*
@@ -285,6 +285,10 @@
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */

+/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
+
/*
* BUG word(s)
*/
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 85599ad..8b45e08 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -57,6 +57,7 @@
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
#define DISABLED_MASK17 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index fac9a5c..6847d85 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_REQUIRED_FEATURES_H */

2016-10-16 14:14:24

by Borislav Petkov

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On Sun, Oct 16, 2016 at 04:21:49AM -0700, tip-bot for Piotr Luc wrote:
> Commit-ID: a518dcc82b6162009c8ca3d169fe61c81536ff17
> Gitweb: http://git.kernel.org/tip/a518dcc82b6162009c8ca3d169fe61c81536ff17
> Author: Piotr Luc <[email protected]>
> AuthorDate: Wed, 12 Oct 2016 19:57:31 +0200
> Committer: Ingo Molnar <[email protected]>
> CommitDate: Sun, 16 Oct 2016 11:32:11 +0200
>
> x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
>
> AVX512_4VNNIW - Vector instructions for deep learning enhanced word
> variable precision.
> AVX512_4FMAPS - Vector instructions for deep learning floating-point
> single precision.
>
> The new instructions are to be used in future Intel Xeon & Xeon Phi
> processors.
>
> The spec can be found in Intel Software Developer Manual or in
> Instruction Set Extensions Programming Reference. See
> https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.
>
> Signed-off-by: Piotr Luc <[email protected]>
> Reviewed-by: Dave Hansen <[email protected]>
> Cc: Andy Lutomirski <[email protected]>
> Cc: Borislav Petkov <[email protected]>
> Cc: Brian Gerst <[email protected]>
> Cc: Denys Vlasenko <[email protected]>
> Cc: H. Peter Anvin <[email protected]>
> Cc: Josh Poimboeuf <[email protected]>
> Cc: Linus Torvalds <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Link: http://lkml.kernel.org/r/[email protected]
> Signed-off-by: Ingo Molnar <[email protected]>

...

> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 1188bc8..6697b75 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -12,7 +12,7 @@
> /*
> * Defines x86 CPU feature bits
> */
> -#define NCAPINTS 18 /* N 32-bit words worth of info */
> +#define NCAPINTS 19 /* N 32-bit words worth of info */
> #define NBUGINTS 1 /* N 32-bit bug flags */
>
> /*
> @@ -285,6 +285,10 @@
> #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
> #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
>
> +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
> +#define X86_FEATURE_AVX512_4VNNIW (18*32+2) /* AVX-512 Neural Network Instructions */
> +#define X86_FEATURE_AVX512_4FMAPS (18*32+3) /* AVX-512 Multiply Accumulation Single precision */

This is getting ridiculous: we keep adding new leafs to
->x86_capability, thus bloating cpuinfo_x86 but then it is not even
worth it - this patch defines only two bits.

I know, I know, it is a CPUID leaf of features, we will need them, yadda
yadda but until we do, I'd suggest these all new feature bits to to
init_scattered_cpuid_features() and be carved out to a leaf of their
own *only* when we really, actually add them and fill up that leaf.
Otherwise, we have one fat and sparse x86_capability array.

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.

2016-10-16 16:05:52

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On October 16, 2016 7:22:33 AM PDT, Borislav Petkov <[email protected]> wrote:
>On Sun, Oct 16, 2016 at 04:21:49AM -0700, tip-bot for Piotr Luc wrote:
>> Commit-ID: a518dcc82b6162009c8ca3d169fe61c81536ff17
>> Gitweb:
>http://git.kernel.org/tip/a518dcc82b6162009c8ca3d169fe61c81536ff17
>> Author: Piotr Luc <[email protected]>
>> AuthorDate: Wed, 12 Oct 2016 19:57:31 +0200
>> Committer: Ingo Molnar <[email protected]>
>> CommitDate: Sun, 16 Oct 2016 11:32:11 +0200
>>
>> x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
>>
>> AVX512_4VNNIW - Vector instructions for deep learning enhanced word
>> variable precision.
>> AVX512_4FMAPS - Vector instructions for deep learning floating-point
>> single precision.
>>
>> The new instructions are to be used in future Intel Xeon & Xeon Phi
>> processors.
>>
>> The spec can be found in Intel Software Developer Manual or in
>> Instruction Set Extensions Programming Reference. See
>>
>https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.
>>
>> Signed-off-by: Piotr Luc <[email protected]>
>> Reviewed-by: Dave Hansen <[email protected]>
>> Cc: Andy Lutomirski <[email protected]>
>> Cc: Borislav Petkov <[email protected]>
>> Cc: Brian Gerst <[email protected]>
>> Cc: Denys Vlasenko <[email protected]>
>> Cc: H. Peter Anvin <[email protected]>
>> Cc: Josh Poimboeuf <[email protected]>
>> Cc: Linus Torvalds <[email protected]>
>> Cc: Peter Zijlstra <[email protected]>
>> Cc: Thomas Gleixner <[email protected]>
>> Link:
>http://lkml.kernel.org/r/[email protected]
>> Signed-off-by: Ingo Molnar <[email protected]>
>
>...
>
>> diff --git a/arch/x86/include/asm/cpufeatures.h
>b/arch/x86/include/asm/cpufeatures.h
>> index 1188bc8..6697b75 100644
>> --- a/arch/x86/include/asm/cpufeatures.h
>> +++ b/arch/x86/include/asm/cpufeatures.h
>> @@ -12,7 +12,7 @@
>> /*
>> * Defines x86 CPU feature bits
>> */
>> -#define NCAPINTS 18 /* N 32-bit words worth of info */
>> +#define NCAPINTS 19 /* N 32-bit words worth of info */
>> #define NBUGINTS 1 /* N 32-bit bug flags */
>>
>> /*
>> @@ -285,6 +285,10 @@
>> #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error
>containment and recovery */
>> #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
>>
>> +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word
>18 */
>> +#define X86_FEATURE_AVX512_4VNNIW (18*32+2) /* AVX-512 Neural
>Network Instructions */
>> +#define X86_FEATURE_AVX512_4FMAPS (18*32+3) /* AVX-512 Multiply
>Accumulation Single precision */
>
>This is getting ridiculous: we keep adding new leafs to
>->x86_capability, thus bloating cpuinfo_x86 but then it is not even
>worth it - this patch defines only two bits.
>
>I know, I know, it is a CPUID leaf of features, we will need them,
>yadda
>yadda but until we do, I'd suggest these all new feature bits to to
>init_scattered_cpuid_features() and be carved out to a leaf of their
>own *only* when we really, actually add them and fill up that leaf.
>Otherwise, we have one fat and sparse x86_capability array.

No, please. That would be worse than the disease.
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.

2016-10-16 16:30:06

by Borislav Petkov

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On Sun, Oct 16, 2016 at 09:02:51AM -0700, [email protected] wrote:
> No, please. That would be worse than the disease.

Why not?

I did that recently with a bunch of leaves and there were no issues:

2ccd71f1b278 ("x86/cpufeature: Move some of the scattered feature bits to x86_capability")

There it obviously made sense for 0x00000006 and 0x8000000a to have a
separate ->x86_capability leaf.

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.

2016-10-16 18:43:35

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On October 16, 2016 9:35:57 AM PDT, Borislav Petkov <[email protected]> wrote:
>On Sun, Oct 16, 2016 at 09:02:51AM -0700, [email protected] wrote:
>> No, please. That would be worse than the disease.
>
>Why not?
>
>I did that recently with a bunch of leaves and there were no issues:
>
>2ccd71f1b278 ("x86/cpufeature: Move some of the scattered feature bits
>to x86_capability")
>
>There it obviously made sense for 0x00000006 and 0x8000000a to have a
>separate ->x86_capability leaf.

It's needlessly adding complexity for no reason, at least for the leaves that are going to add bits over time. The x86_capability array is not an expensive resource.
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.

2016-10-16 22:34:51

by Borislav Petkov

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On Sun, Oct 16, 2016 at 11:42:26AM -0700, [email protected] wrote:
> It's needlessly adding complexity for no reason, at least for the

What complexity? The init_scattered_cpuid_features() version is a
trivial patch in comparison to the current version.

> leaves that are going to add bits over time.

Sure, except they don't get added or we don't need them or whatever, and
we end up with only a small number of bits actually being used.

I don't mind moving them to x86_capability later, when a high percentage
of the respective leaf is actually being used but not for a couple of
bits. That's just waste.

> The x86_capability array is not an expensive resource.

0.1% here, 0.1% there, the creeping bloat thing.

And again, the init_scattered_cpuid_features() hunk is much smaller.

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.

2016-10-17 07:58:57

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On Sun, 16 Oct 2016, Borislav Petkov wrote:
> > The spec can be found in Intel Software Developer Manual or in
> > Instruction Set Extensions Programming Reference. See
> > https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.
>
> > +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
> > +#define X86_FEATURE_AVX512_4VNNIW (18*32+2) /* AVX-512 Neural Network Instructions */
> > +#define X86_FEATURE_AVX512_4FMAPS (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
>
> This is getting ridiculous: we keep adding new leafs to
> ->x86_capability, thus bloating cpuinfo_x86 but then it is not even
> worth it - this patch defines only two bits.

What's worse is that the Instruction Set Extensions Programming Reference
manual says:

CPUID.(EAX=07H, ECX=0):EDX[bit 02] AVX512_4FMAPS
CPUID.(EAX=07H, ECX=0):EBX[bit 03] AVX512_4VNNIW

So AVX512_4VNNIW is in EBX not EDX. What's correct here? The manual or the patch?

I'm going to zap it.

Thanks,

tglx

2016-10-17 08:23:03

by Piotr Luc

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On Mon, 2016-10-17 at 09:55 +0200, Thomas Gleixner wrote:
> On Sun, 16 Oct 2016, Borislav Petkov wrote:
> >
> > >
> > > The spec can be found in Intel Software Developer Manual or in
> > > Instruction Set Extensions Programming Reference. See
> > > https://software.intel.com/sites/default/files/managed/69/78/3194
> > > 33-025.pdf.
> >
> > >
> > > +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx),
> > > word 18 */
> > > +#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural
> > > Network Instructions */
> > > +#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply
> > > Accumulation Single precision */
> >
> > This is getting ridiculous: we keep adding new leafs to
> > ->x86_capability, thus bloating cpuinfo_x86 but then it is not even
> > worth it - this patch defines only two bits.
>
> What's worse is that the Instruction Set Extensions Programming
> Reference
> manual says:
>
> CPUID.(EAX=07H, ECX=0):EDX[bit 02] AVX512_4FMAPS
> CPUID.(EAX=07H, ECX=0):EBX[bit 03] AVX512_4VNNIW
>
> So AVX512_4VNNIW is in EBX not EDX. What's correct here? The manual
> or the patch?
>
> I'm going to zap it.
>
The manual contains the typo in  table 2.1 on page 2.2.
Please compare it to the detailed description of CPUID in table 4.8 on
page 2-16.
There manual groups both new bits under EDX:

EDX    Bits 01 - 00: Reserved
       Bit 02: AVX512_4VNNIW (Vector instructions for deep learning
enhanced word variable precision.)
       Bit 03: AVX512_4FMAPS (Vector instructions for deep learning
floating-point single precision.)
       Bits 31-04: Reserved

The typo was acknowledged and is going to be fixed in next version of
the document. 

Regards,
Piotr

2016-10-17 08:47:58

by Ingo Molnar

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features


* Luc, Piotr <[email protected]> wrote:

> On Mon, 2016-10-17 at 09:55 +0200, Thomas Gleixner wrote:
> > On Sun, 16 Oct 2016, Borislav Petkov wrote:
> > >
> > > >
> > > > The spec can be found in Intel Software Developer Manual or in
> > > > Instruction Set Extensions Programming Reference. See
> > > > https://software.intel.com/sites/default/files/managed/69/78/3194
> > > > 33-025.pdf.
> > >
> > > >
> > > > +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx),
> > > > word 18 */
> > > > +#define X86_FEATURE_AVX512_4VNNIW??(18*32+2) /* AVX-512 Neural
> > > > Network Instructions */
> > > > +#define X86_FEATURE_AVX512_4FMAPS??(18*32+3) /* AVX-512 Multiply
> > > > Accumulation Single precision */
> > >
> > > This is getting ridiculous: we keep adding new leafs to
> > > ->x86_capability, thus bloating cpuinfo_x86 but then it is not even
> > > worth it - this patch defines only two bits.
> >
> > What's worse is that the Instruction Set Extensions Programming
> > Reference
> > manual says:
> >
> > CPUID.(EAX=07H, ECX=0):EDX[bit 02] AVX512_4FMAPS
> > CPUID.(EAX=07H, ECX=0):EBX[bit 03] AVX512_4VNNIW
> >
> > So AVX512_4VNNIW is in EBX not EDX. What's correct here? The manual
> > or the patch?
> >
> > I'm going to zap it.
> >
> The manual contains the typo in ?table 2.1 on page 2.2.
> Please compare it to the detailed description of CPUID in table 4.8 on
> page 2-16.
> There manual groups both new bits under EDX:
>
> EDX ? ?Bits 01 - 00: Reserved
> ? ? ? ?Bit 02: AVX512_4VNNIW (Vector instructions for deep learning
> enhanced word variable precision.)
> ? ? ? ?Bit 03: AVX512_4FMAPS (Vector instructions for deep learning
> floating-point single precision.)
> ? ? ? ?Bits 31-04: Reserved
>
> The typo was acknowledged and is going to be fixed in next version of
> the document.?

All of this should be pointed out in the changelog.

I've zapped the commit for the time being - let's iterate this once more, ok?

Thanks,

Ingo

2016-10-17 09:53:53

by Piotr Luc

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On Mon, 2016-10-17 at 10:47 +0200, Ingo Molnar wrote:
> * Luc, Piotr <[email protected]> wrote:
>
> >
> > On Mon, 2016-10-17 at 09:55 +0200, Thomas Gleixner wrote:
> > >

> > The typo was acknowledged and is going to be fixed in next version
> > of
> > the document. 
>
> All of this should be pointed out in the changelog.
>
> I've zapped the commit for the time being - let's iterate this once
> more, ok?
>

OK, I will add appropriate info.

What about moving initialization to init_scattered_cpuid_features()?

Regards,
Piotr


2016-10-17 09:55:21

by Piotr Luc

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On Mon, 2016-10-17 at 00:42 +0200, Borislav Petkov wrote:
> On Sun, Oct 16, 2016 at 11:42:26AM -0700, [email protected] wrote:
> >
> > It's needlessly adding complexity for no reason, at least for the
>
> What complexity? The init_scattered_cpuid_features() version is a
> trivial patch in comparison to the current version.
>
> >
> > leaves that are going to add bits over time.
>
> Sure, except they don't get added or we don't need them or whatever,
> and
> we end up with only a small number of bits actually being used.
>
> I don't mind moving them to x86_capability later, when a high
> percentage
> of the respective leaf is actually being used but not for a couple of
> bits. That's just waste.
>
> >
> > The x86_capability array is not an expensive resource.
>
> 0.1% here, 0.1% there, the creeping bloat thing.
>
> And again, the init_scattered_cpuid_features() hunk is much smaller.
>
I agree, the scattered solution reduces data segment footprint in case
many cores.

Regards,
Piotr

2016-10-17 09:57:01

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

On Mon, 17 Oct 2016, Luc, Piotr wrote:
> On Mon, 2016-10-17 at 10:47 +0200, Ingo Molnar wrote:
> > * Luc, Piotr <[email protected]> wrote:
> > >
> > > The typo was acknowledged and is going to be fixed in next version
> > > of the document.
> >
> > All of this should be pointed out in the changelog.
> >
> > I've zapped the commit for the time being - let's iterate this once
> > more, ok?
> >
>
> OK, I will add appropriate info.
>
> What about moving initialization to init_scattered_cpuid_features()?

Yes, please. We can move it to a seperate leaf when a substantial amount of
bits is used.

Thanks,

tglx

2016-10-17 15:04:18

by Piotr Luc

[permalink] [raw]
Subject: [PATCH v2] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.

AVX512_4VNNIW - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

The new instructions are to be used in future Intel Xeon & Xeon Phi
processors.

The spec can be found in Intel Software Developer Manual (SDM) or in
Instruction Set Extensions Programming Reference (ISE).
The implementation is based on Table 2.8 "Information Returned by CPUID
Instruction" in ISE,
https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.

v2: Initialize new bits in the scattered group. Add

Signed-off-by: Piotr Luc <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/kernel/cpu/scattered.c | 2 ++
arch/x86/kernel/fpu/xstate.c | 2 ++
tools/arch/x86/include/asm/cpufeatures.h | 2 ++
4 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308..4ecbce9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -195,6 +195,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */

#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..1db8dc4 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)

static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 01567aa..7dbd480 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -73,6 +73,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
setup_clear_cpu_cap(X86_FEATURE_PKU);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
}

/*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 92a8308..4ecbce9 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -195,6 +195,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */

#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
--
2.10.1

2016-10-18 13:00:34

by Piotr Luc

[permalink] [raw]
Subject: Re: [v2] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.

On Mon, 2016-10-17 at 17:03 +0200, Piotr Luc wrote:
> v2: Initialize new bits in the scattered group. Add

The commit message is obviously broken. Sorry for that.
I will resend with fixed message.

Regards,
Piotr

2016-10-18 13:08:26

by Ingo Molnar

[permalink] [raw]
Subject: Re: [v2] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.


* Luc, Piotr <[email protected]> wrote:

> On Mon, 2016-10-17 at 17:03 +0200, Piotr Luc wrote:
> > v2: Initialize new bits in the scattered group. Add
>
> The commit message is obviously broken. Sorry for that.
> I will resend with fixed message.

In the v3 patchlog please also please describe to what extent new instructions are
enabled by the patch when run on real hardware (or on a simulator).

I.e. can user-space run those new instructions, while it couldn't before - or is
the patch purely for /proc/cpuinfo enumeration?

I.e. a comprehensive before/after comparison.

Thanks,

Ingo

2016-10-18 15:01:36

by Piotr Luc

[permalink] [raw]
Subject: [PATCH v3] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.

AVX512_4VNNIW - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

The new instructions are to be used in future Intel Xeon & Xeon Phi
processors. The bits 2&3 of CPUID[level:0x07, EDX] inform that new
instructions are supported by a processor and can be used by programs.

The patch defines new feature flags to enumerate new instruction groups
in /proc/cpuinfo accordingly to CPUID bits. Because correct xsave setup
is required to use AVX512 instruction, the patch clears the new feature
flags in CPU caps to inform programs not to use the instructions if the
setup fails.

The spec can be found in Intel Software Developer Manual (SDM) or in
Instruction Set Extensions Programming Reference (ISE).
The implementation is based on Table 2.8 "Information Returned by CPUID
Instruction" in ISE,
https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.

v2: Initialize new bits in the scattered group. Add reference to correct
description of new feature bits.
v3: Fix v2 info. Add short info what the patch does.

Signed-off-by: Piotr Luc <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/kernel/cpu/scattered.c | 2 ++
arch/x86/kernel/fpu/xstate.c | 2 ++
tools/arch/x86/include/asm/cpufeatures.h | 2 ++
4 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308..4ecbce9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -195,6 +195,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */

#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..1db8dc4 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)

static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 01567aa..7dbd480 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -73,6 +73,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
setup_clear_cpu_cap(X86_FEATURE_PKU);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
}

/*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 92a8308..4ecbce9 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -195,6 +195,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */

#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
--
2.10.1

Subject: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

Commit-ID: 8214899342981dbd49ae24aadbbd19e9e7830684
Gitweb: http://git.kernel.org/tip/8214899342981dbd49ae24aadbbd19e9e7830684
Author: Piotr Luc <[email protected]>
AuthorDate: Tue, 18 Oct 2016 17:01:11 +0200
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 19 Oct 2016 17:37:13 +0200

x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

AVX512_4VNNIW - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

These new instructions are to be used in future Intel Xeon & Xeon Phi
processors. The bits 2&3 of CPUID[level:0x07, EDX] inform that new
instructions are supported by a processor.

The spec can be found in the Intel Software Developer Manual (SDM) or in
the Instruction Set Extensions Programming Reference (ISE).

Define new feature flags to enumerate the new instructions in /proc/cpuinfo
accordingly to CPUID bits and add the required xsave extensions which are
required for proper operation.

Signed-off-by: Piotr Luc <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/kernel/cpu/scattered.c | 2 ++
arch/x86/kernel/fpu/xstate.c | 2 ++
tools/arch/x86/include/asm/cpufeatures.h | 2 ++
4 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1188bc8..a396292 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -194,6 +194,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */

#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..1db8dc4 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)

static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 124aa5c..095ef7d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
setup_clear_cpu_cap(X86_FEATURE_PKU);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
}

/*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1188bc8..a396292 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -194,6 +194,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */

#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */