2023-09-28 22:32:57

by Compostella, Jeremy

[permalink] [raw]
Subject: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

On TME enabled platform, BIOS publishes MTRR taking into account Total
Memory Encryption (TME) reserved bits.

generic_get_mtrr() performs a sanity check of the MTRRs relying on the
`phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
`x86_phys_bits' field. But at the time the generic_get_mtrr()
function is ran the `x86_phys_bits' has not been updated by
detect_tme() when TME is enabled.

Since the x86_phys_bits does not reflect yet the real maximal physical
address size yet generic_get_mtrr() complains by logging the following
messages.

mtrr: your BIOS has configured an incorrect mask, fixing it.
mtrr: your BIOS has configured an incorrect mask, fixing it.
[...]

In such a situation, generic_get_mtrr() returns an incorrect size but
no side effect were observed during our testing.

For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
move the detect_tme() call from init_intel() to early_init_intel().

Change for v3:
Take review commit message comments into account.

Change for v2:
Add a comment in the code explaining why detect_tme() needs to called
in early_init_intel().

Signed-off-by: Jeremy Compostella <[email protected]>
---
arch/x86/kernel/cpu/intel.c | 178 ++++++++++++++++++------------------
1 file changed, 91 insertions(+), 87 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index be4045628fd3..17240b96ffda 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -184,6 +184,90 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
return false;
}

+#define MSR_IA32_TME_ACTIVATE 0x982
+
+/* Helpers to access TME_ACTIVATE MSR */
+#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
+#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
+
+#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
+#define TME_ACTIVATE_POLICY_AES_XTS_128 0
+
+#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
+
+#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
+#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
+
+/* Values for mktme_status (SW only construct) */
+#define MKTME_ENABLED 0
+#define MKTME_DISABLED 1
+#define MKTME_UNINITIALIZED 2
+static int mktme_status = MKTME_UNINITIALIZED;
+
+static void detect_tme(struct cpuinfo_x86 *c)
+{
+ u64 tme_activate, tme_policy, tme_crypto_algs;
+ int keyid_bits = 0, nr_keyids = 0;
+ static u64 tme_activate_cpu0 = 0;
+
+ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+
+ if (mktme_status != MKTME_UNINITIALIZED) {
+ if (tme_activate != tme_activate_cpu0) {
+ /* Broken BIOS? */
+ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+ pr_err_once("x86/tme: MKTME is not usable\n");
+ mktme_status = MKTME_DISABLED;
+
+ /* Proceed. We may need to exclude bits from x86_phys_bits. */
+ }
+ } else {
+ tme_activate_cpu0 = tme_activate;
+ }
+
+ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+ pr_info_once("x86/tme: not enabled by BIOS\n");
+ mktme_status = MKTME_DISABLED;
+ return;
+ }
+
+ if (mktme_status != MKTME_UNINITIALIZED)
+ goto detect_keyid_bits;
+
+ pr_info("x86/tme: enabled by BIOS\n");
+
+ tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+
+ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+ tme_crypto_algs);
+ mktme_status = MKTME_DISABLED;
+ }
+detect_keyid_bits:
+ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+ nr_keyids = (1UL << keyid_bits) - 1;
+ if (nr_keyids) {
+ pr_info_once("x86/mktme: enabled by BIOS\n");
+ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+ } else {
+ pr_info_once("x86/mktme: disabled by BIOS\n");
+ }
+
+ if (mktme_status == MKTME_UNINITIALIZED) {
+ /* MKTME is usable */
+ mktme_status = MKTME_ENABLED;
+ }
+
+ /*
+ * KeyID bits effectively lower the number of physical address
+ * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+ */
+ c->x86_phys_bits -= keyid_bits;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -335,6 +419,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
*/
if (detect_extended_topology_early(c) < 0)
detect_ht_early(c);
+
+ /*
+ * detect_tme() must be called early as it updates the number of
+ * physical address bits which is used during the MTRR sanity check.
+ */
+ if (cpu_has(c, X86_FEATURE_TME))
+ detect_tme(c);
}

static void bsp_init_intel(struct cpuinfo_x86 *c)
@@ -495,90 +586,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}

-#define MSR_IA32_TME_ACTIVATE 0x982
-
-/* Helpers to access TME_ACTIVATE MSR */
-#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
-#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
-
-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
-#define TME_ACTIVATE_POLICY_AES_XTS_128 0
-
-#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
-
-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
-
-/* Values for mktme_status (SW only construct) */
-#define MKTME_ENABLED 0
-#define MKTME_DISABLED 1
-#define MKTME_UNINITIALIZED 2
-static int mktme_status = MKTME_UNINITIALIZED;
-
-static void detect_tme(struct cpuinfo_x86 *c)
-{
- u64 tme_activate, tme_policy, tme_crypto_algs;
- int keyid_bits = 0, nr_keyids = 0;
- static u64 tme_activate_cpu0 = 0;
-
- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
-
- if (mktme_status != MKTME_UNINITIALIZED) {
- if (tme_activate != tme_activate_cpu0) {
- /* Broken BIOS? */
- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
- pr_err_once("x86/tme: MKTME is not usable\n");
- mktme_status = MKTME_DISABLED;
-
- /* Proceed. We may need to exclude bits from x86_phys_bits. */
- }
- } else {
- tme_activate_cpu0 = tme_activate;
- }
-
- if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
- pr_info_once("x86/tme: not enabled by BIOS\n");
- mktme_status = MKTME_DISABLED;
- return;
- }
-
- if (mktme_status != MKTME_UNINITIALIZED)
- goto detect_keyid_bits;
-
- pr_info("x86/tme: enabled by BIOS\n");
-
- tme_policy = TME_ACTIVATE_POLICY(tme_activate);
- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
-
- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
- tme_crypto_algs);
- mktme_status = MKTME_DISABLED;
- }
-detect_keyid_bits:
- keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
- nr_keyids = (1UL << keyid_bits) - 1;
- if (nr_keyids) {
- pr_info_once("x86/mktme: enabled by BIOS\n");
- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
- } else {
- pr_info_once("x86/mktme: disabled by BIOS\n");
- }
-
- if (mktme_status == MKTME_UNINITIALIZED) {
- /* MKTME is usable */
- mktme_status = MKTME_ENABLED;
- }
-
- /*
- * KeyID bits effectively lower the number of physical address
- * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
- */
- c->x86_phys_bits -= keyid_bits;
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -715,9 +722,6 @@ static void init_intel(struct cpuinfo_x86 *c)

init_ia32_feat_ctl(c);

- if (cpu_has(c, X86_FEATURE_TME))
- detect_tme(c);
-
init_intel_misc_features(c);

split_lock_init();
--
2.40.1


2023-09-29 09:18:46

by Huang, Kai

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
> On TME enabled platform, BIOS publishes MTRR taking into account Total
> Memory Encryption (TME) reserved bits.
>
> generic_get_mtrr() performs a sanity check of the MTRRs relying on the
> `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
> `x86_phys_bits' field. But at the time the generic_get_mtrr()
> function is ran the `x86_phys_bits' has not been updated by
> detect_tme() when TME is enabled.
>
> Since the x86_phys_bits does not reflect yet the real maximal physical
> address size yet generic_get_mtrr() complains by logging the following
> messages.
>
> mtrr: your BIOS has configured an incorrect mask, fixing it.
> mtrr: your BIOS has configured an incorrect mask, fixing it.
> [...]
>
> In such a situation, generic_get_mtrr() returns an incorrect size but
> no side effect were observed during our testing.
>
> For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
> move the detect_tme() call from init_intel() to early_init_intel().

Hi,

This move looks good to me, but +Kirill who is the author of detect_tme() for
further comments.

Also I am not sure whether it's worth to consider to move this to
get_cpu_address_sizes(), which calculates the virtual/physical address sizes.
Thus it seems anything that can impact physical address size could be put there.

Perhaps can be done in the future if ever needed.

>
> Change for v3:
> Take review commit message comments into account.
>
> Change for v2:
> Add a comment in the code explaining why detect_tme() needs to called
> in early_init_intel().

You can put version history to

<Your SoB>
---
version history
---
...

then it will be stripped away when the patch is applied.

Also, for related multiple patches please use --cover-letter --thread to make it
a series. You can also use --base=auto to include the base commit id to the
patch.

2023-10-02 22:48:31

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

On Fri, Sep 29, 2023 at 09:14:00AM +0000, Huang, Kai wrote:
> On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
> > On TME enabled platform, BIOS publishes MTRR taking into account Total
> > Memory Encryption (TME) reserved bits.
> >
> > generic_get_mtrr() performs a sanity check of the MTRRs relying on the
> > `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
> > `x86_phys_bits' field. But at the time the generic_get_mtrr()
> > function is ran the `x86_phys_bits' has not been updated by
> > detect_tme() when TME is enabled.
> >
> > Since the x86_phys_bits does not reflect yet the real maximal physical
> > address size yet generic_get_mtrr() complains by logging the following
> > messages.
> >
> > mtrr: your BIOS has configured an incorrect mask, fixing it.
> > mtrr: your BIOS has configured an incorrect mask, fixing it.
> > [...]
> >
> > In such a situation, generic_get_mtrr() returns an incorrect size but
> > no side effect were observed during our testing.
> >
> > For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
> > move the detect_tme() call from init_intel() to early_init_intel().
>
> Hi,
>
> This move looks good to me, but +Kirill who is the author of detect_tme() for
> further comments.
>
> Also I am not sure whether it's worth to consider to move this to
> get_cpu_address_sizes(), which calculates the virtual/physical address sizes.
> Thus it seems anything that can impact physical address size could be put there.

Actually, I am not sure how this patch works. AFAICS after the patch we
have the following callchain:

early_identify_cpu()
this_cpu->c_early_init() (which is early_init_init())
detect_tme()
c->x86_phys_bits -= keyid_bits;
get_cpu_address_sizes(c);
c->x86_phys_bits = eax & 0xff;

Looks like get_cpu_address_sizes() would override what detect_tme() does.

I guess we reach the same detect_tme() again via c->c_init() (aka
init_intel()) codepath and get the value right again.

But it seems accidental.

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-10-03 02:07:26

by Huang, Kai

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

On Tue, 2023-10-03 at 01:47 +0300, [email protected] wrote:
> On Fri, Sep 29, 2023 at 09:14:00AM +0000, Huang, Kai wrote:
> > On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
> > > On TME enabled platform, BIOS publishes MTRR taking into account Total
> > > Memory Encryption (TME) reserved bits.
> > >
> > > generic_get_mtrr() performs a sanity check of the MTRRs relying on the
> > > `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
> > > `x86_phys_bits' field. But at the time the generic_get_mtrr()
> > > function is ran the `x86_phys_bits' has not been updated by
> > > detect_tme() when TME is enabled.
> > >
> > > Since the x86_phys_bits does not reflect yet the real maximal physical
> > > address size yet generic_get_mtrr() complains by logging the following
> > > messages.
> > >
> > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> > > [...]
> > >
> > > In such a situation, generic_get_mtrr() returns an incorrect size but
> > > no side effect were observed during our testing.
> > >
> > > For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
> > > move the detect_tme() call from init_intel() to early_init_intel().
> >
> > Hi,
> >
> > This move looks good to me, but +Kirill who is the author of detect_tme() for
> > further comments.
> >
> > Also I am not sure whether it's worth to consider to move this to
> > get_cpu_address_sizes(), which calculates the virtual/physical address sizes.
> > Thus it seems anything that can impact physical address size could be put there.
>
> Actually, I am not sure how this patch works. AFAICS after the patch we
> have the following callchain:
>
> early_identify_cpu()
> this_cpu->c_early_init() (which is early_init_init())
> detect_tme()
> c->x86_phys_bits -= keyid_bits;
> get_cpu_address_sizes(c);
> c->x86_phys_bits = eax & 0xff;
>
> Looks like get_cpu_address_sizes() would override what detect_tme() does.

After this patch, early_identify_cpu() calls get_cpu_address_sizes() first and
then calls c_early_init(), which calls detect_tme().

So looks no override. No?

>
> I guess we reach the same detect_tme() again via c->c_init() (aka
> init_intel()) codepath and get the value right again.
>
> But it seems accidental.
>

After this patch:

identify_cpu() ->
generic_identify() ->
get_cpu_address_sizes() <----- (1)
this_cpu->c_init() ->
early_init_intel() ->
detect_tme() <----- (2)

(1) resets x86_phys_bits [*], but (2) is called rather soon so nothing wrong is
done between them for now.

But there's a window between (1) and (2) (and similarly in early_identify_cpu()
too). Things can get wrong if people are careless, thus I said _perhaps_ it's
worth to consider to move detect_tme() to get_cpu_address_sizes(). But I don't
know.

[*]: on the other hand, (1) is necessary to make (2) right.

2023-10-03 07:07:11

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

On Tue, Oct 03, 2023 at 02:06:52AM +0000, Huang, Kai wrote:
> On Tue, 2023-10-03 at 01:47 +0300, [email protected] wrote:
> > On Fri, Sep 29, 2023 at 09:14:00AM +0000, Huang, Kai wrote:
> > > On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
> > > > On TME enabled platform, BIOS publishes MTRR taking into account Total
> > > > Memory Encryption (TME) reserved bits.
> > > >
> > > > generic_get_mtrr() performs a sanity check of the MTRRs relying on the
> > > > `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
> > > > `x86_phys_bits' field. But at the time the generic_get_mtrr()
> > > > function is ran the `x86_phys_bits' has not been updated by
> > > > detect_tme() when TME is enabled.
> > > >
> > > > Since the x86_phys_bits does not reflect yet the real maximal physical
> > > > address size yet generic_get_mtrr() complains by logging the following
> > > > messages.
> > > >
> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> > > > [...]
> > > >
> > > > In such a situation, generic_get_mtrr() returns an incorrect size but
> > > > no side effect were observed during our testing.
> > > >
> > > > For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
> > > > move the detect_tme() call from init_intel() to early_init_intel().
> > >
> > > Hi,
> > >
> > > This move looks good to me, but +Kirill who is the author of detect_tme() for
> > > further comments.
> > >
> > > Also I am not sure whether it's worth to consider to move this to
> > > get_cpu_address_sizes(), which calculates the virtual/physical address sizes.
> > > Thus it seems anything that can impact physical address size could be put there.
> >
> > Actually, I am not sure how this patch works. AFAICS after the patch we
> > have the following callchain:
> >
> > early_identify_cpu()
> > this_cpu->c_early_init() (which is early_init_init())
> > detect_tme()
> > c->x86_phys_bits -= keyid_bits;
> > get_cpu_address_sizes(c);
> > c->x86_phys_bits = eax & 0xff;
> >
> > Looks like get_cpu_address_sizes() would override what detect_tme() does.
>
> After this patch, early_identify_cpu() calls get_cpu_address_sizes() first and
> then calls c_early_init(), which calls detect_tme().
>
> So looks no override. No?

We identify CPU twice: once via early_cpu_init() and the second time via
identify_boot_cpu()/identify_secondary_cpu(). I am talking about
early_cpu_init() codepath.

It might not matter in practice as of now, because it will get straight
later, but CPU ident code is mess as it is. Let's not make it even worse.

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-10-13 23:03:15

by Compostella, Jeremy

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

"[email protected]" <[email protected]> writes:
> On Tue, Oct 03, 2023 at 02:06:52AM +0000, Huang, Kai wrote:
>> On Tue, 2023-10-03 at 01:47 +0300, [email protected] wrote:
>> > On Fri, Sep 29, 2023 at 09:14:00AM +0000, Huang, Kai wrote:
>> > > On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
>> > > > On TME enabled platform, BIOS publishes MTRR taking into account Total
>> > > > Memory Encryption (TME) reserved bits.
>> > > >
>> > > > generic_get_mtrr() performs a sanity check of the MTRRs relying on the
>> > > > `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
>> > > > `x86_phys_bits' field. But at the time the generic_get_mtrr()
>> > > > function is ran the `x86_phys_bits' has not been updated by
>> > > > detect_tme() when TME is enabled.
>> > > >
>> > > > Since the x86_phys_bits does not reflect yet the real maximal physical
>> > > > address size yet generic_get_mtrr() complains by logging the following
>> > > > messages.
>> > > >
>> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
>> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
>> > > > [...]
>> > > >
>> > > > In such a situation, generic_get_mtrr() returns an incorrect size but
>> > > > no side effect were observed during our testing.
>> > > >
>> > > > For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
>> > > > move the detect_tme() call from init_intel() to early_init_intel().
>> > >
>> > > Hi,
>> > >
>> > > This move looks good to me, but +Kirill who is the author of detect_tme() for
>> > > further comments.
>> > >
>> > > Also I am not sure whether it's worth to consider to move this to
>> > > get_cpu_address_sizes(), which calculates the virtual/physical address sizes.
>> > > Thus it seems anything that can impact physical address size could be put there.
>> >
>> > Actually, I am not sure how this patch works. AFAICS after the patch we
>> > have the following callchain:
>> >
>> > early_identify_cpu()
>> > this_cpu->c_early_init() (which is early_init_init())
>> > detect_tme()
>> > c->x86_phys_bits -= keyid_bits;
>> > get_cpu_address_sizes(c);
>> > c->x86_phys_bits = eax & 0xff;
>> >
>> > Looks like get_cpu_address_sizes() would override what detect_tme() does.
>>
>> After this patch, early_identify_cpu() calls get_cpu_address_sizes() first and
>> then calls c_early_init(), which calls detect_tme().
>>
>> So looks no override. No?

No override indeed as get_cpu_address_sizes() is always called before
early_init_intel or init_intel().

- init/main.c::start_kernel()
- arch/x86/kernel/setup.c::setup_arch()
- arch/x86/kernel/cpu/common.c::early_cpu_init()
- early_identify_cpu()
- get_cpu_address_sizes(c)
c->x86_phys_bits = eax & 0xff;
- arch/x86/kernel/cpu/intel.c::early_init_intel()
- detect_tme()
c->x86_phys_bits -= keyid_bits;
- arch/x86/kernel/cpu/common.c::arch_cpu_finalize_init()
- identify_boot_cpu()
- identify_cpu()
- get_cpu_address_sizes(c)
c->x86_phys_bits = eax & 0xff;
- arch/x86/kernel/cpu/intel.c::init_intel()
- early_init_intel()
- detect_tme()
c->x86_phys_bits -= keyid_bits;

> We identify CPU twice: once via early_cpu_init() and the second time via
> identify_boot_cpu()/identify_secondary_cpu(). I am talking about
> early_cpu_init() codepath.
>
> It might not matter in practice as of now, because it will get straight
> later, but CPU ident code is mess as it is. Let's not make it even worse.

This change is not modifying the CPU indent code, this is just
re-ordering detect_tme() call in the intel specifics hook so that the
information is available earlier as it is needed by
generic_get_mtrr(). This is similar to what is done in
arch/x86/kernel/cpu/amd.c.


Attachments:
(No filename) (3.90 kB)

2023-10-14 21:02:04

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

On Fri, Oct 13, 2023 at 04:03:02PM -0700, Compostella, Jeremy wrote:
> "[email protected]" <[email protected]> writes:
> > On Tue, Oct 03, 2023 at 02:06:52AM +0000, Huang, Kai wrote:
> >> On Tue, 2023-10-03 at 01:47 +0300, [email protected] wrote:
> >> > On Fri, Sep 29, 2023 at 09:14:00AM +0000, Huang, Kai wrote:
> >> > > On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
> >> > > > On TME enabled platform, BIOS publishes MTRR taking into account Total
> >> > > > Memory Encryption (TME) reserved bits.
> >> > > >
> >> > > > generic_get_mtrr() performs a sanity check of the MTRRs relying on the
> >> > > > `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
> >> > > > `x86_phys_bits' field. But at the time the generic_get_mtrr()
> >> > > > function is ran the `x86_phys_bits' has not been updated by
> >> > > > detect_tme() when TME is enabled.
> >> > > >
> >> > > > Since the x86_phys_bits does not reflect yet the real maximal physical
> >> > > > address size yet generic_get_mtrr() complains by logging the following
> >> > > > messages.
> >> > > >
> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> >> > > > [...]
> >> > > >
> >> > > > In such a situation, generic_get_mtrr() returns an incorrect size but
> >> > > > no side effect were observed during our testing.
> >> > > >
> >> > > > For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
> >> > > > move the detect_tme() call from init_intel() to early_init_intel().
> >> > >
> >> > > Hi,
> >> > >
> >> > > This move looks good to me, but +Kirill who is the author of detect_tme() for
> >> > > further comments.
> >> > >
> >> > > Also I am not sure whether it's worth to consider to move this to
> >> > > get_cpu_address_sizes(), which calculates the virtual/physical address sizes.
> >> > > Thus it seems anything that can impact physical address size could be put there.
> >> >
> >> > Actually, I am not sure how this patch works. AFAICS after the patch we
> >> > have the following callchain:
> >> >
> >> > early_identify_cpu()
> >> > this_cpu->c_early_init() (which is early_init_init())
> >> > detect_tme()
> >> > c->x86_phys_bits -= keyid_bits;
> >> > get_cpu_address_sizes(c);
> >> > c->x86_phys_bits = eax & 0xff;
> >> >
> >> > Looks like get_cpu_address_sizes() would override what detect_tme() does.
> >>
> >> After this patch, early_identify_cpu() calls get_cpu_address_sizes() first and
> >> then calls c_early_init(), which calls detect_tme().
> >>
> >> So looks no override. No?
>
> No override indeed as get_cpu_address_sizes() is always called before
> early_init_intel or init_intel().
>
> - init/main.c::start_kernel()
> - arch/x86/kernel/setup.c::setup_arch()
> - arch/x86/kernel/cpu/common.c::early_cpu_init()
> - early_identify_cpu()
> - get_cpu_address_sizes(c)
> c->x86_phys_bits = eax & 0xff;
> - arch/x86/kernel/cpu/intel.c::early_init_intel()
> - detect_tme()
> c->x86_phys_bits -= keyid_bits;

Hmm.. Do I read it wrong:

static void __init early_identify_cpu(struct cpuinfo_x86 *c)
{
...
/* cyrix could have cpuid enabled via c_identify()*/
if (have_cpuid_p()) {
...
// Here we call early_intel_init()
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
...
}

get_cpu_address_sizes(c);
...
}

?

As far as I see get_cpu_address_sizes() called after early_intel_init().

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-10-16 16:16:15

by Compostella, Jeremy

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

<[email protected]> writes:

> On Fri, Oct 13, 2023 at 04:03:02PM -0700, Compostella, Jeremy wrote:
>> "[email protected]" <[email protected]> writes:
>> > On Tue, Oct 03, 2023 at 02:06:52AM +0000, Huang, Kai wrote:
>> >> On Tue, 2023-10-03 at 01:47 +0300, [email protected] wrote:
>> >> > On Fri, Sep 29, 2023 at 09:14:00AM +0000, Huang, Kai wrote:
>> >> > > On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
>> >> > > > On TME enabled platform, BIOS publishes MTRR taking into account Total
>> >> > > > Memory Encryption (TME) reserved bits.
>> >> > > >
>> >> > > > generic_get_mtrr() performs a sanity check of the MTRRs relying on the
>> >> > > > `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
>> >> > > > `x86_phys_bits' field. But at the time the generic_get_mtrr()
>> >> > > > function is ran the `x86_phys_bits' has not been updated by
>> >> > > > detect_tme() when TME is enabled.
>> >> > > >
>> >> > > > Since the x86_phys_bits does not reflect yet the real maximal physical
>> >> > > > address size yet generic_get_mtrr() complains by logging the following
>> >> > > > messages.
>> >> > > >
>> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
>> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
>> >> > > > [...]
>> >> > > >
>> >> > > > In such a situation, generic_get_mtrr() returns an incorrect size but
>> >> > > > no side effect were observed during our testing.
>> >> > > >
>> >> > > > For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
>> >> > > > move the detect_tme() call from init_intel() to early_init_intel().
>> >> > >
>> >> > > Hi,
>> >> > >
>> >> > > This move looks good to me, but +Kirill who is the author of detect_tme() for
>> >> > > further comments.
>> >> > >
>> >> > > Also I am not sure whether it's worth to consider to move this to
>> >> > > get_cpu_address_sizes(), which calculates the virtual/physical address sizes.
>> >> > > Thus it seems anything that can impact physical address size
>> >> > > could be put there.
>> >> >
>> >> > Actually, I am not sure how this patch works. AFAICS after the patch we
>> >> > have the following callchain:
>> >> >
>> >> > early_identify_cpu()
>> >> > this_cpu->c_early_init() (which is early_init_init())
>> >> > detect_tme()
>> >> > c->x86_phys_bits -= keyid_bits;
>> >> > get_cpu_address_sizes(c);
>> >> > c->x86_phys_bits = eax & 0xff;
>> >> >
>> >> > Looks like get_cpu_address_sizes() would override what detect_tme() does.
>> >>
>> >> After this patch, early_identify_cpu() calls get_cpu_address_sizes() first and
>> >> then calls c_early_init(), which calls detect_tme().
>> >>
>> >> So looks no override. No?
>>
>> No override indeed as get_cpu_address_sizes() is always called before
>> early_init_intel or init_intel().
>>
>> - init/main.c::start_kernel()
>> - arch/x86/kernel/setup.c::setup_arch()
>> - arch/x86/kernel/cpu/common.c::early_cpu_init()
>> - early_identify_cpu()
>> - get_cpu_address_sizes(c)
>> c->x86_phys_bits = eax & 0xff;
>> - arch/x86/kernel/cpu/intel.c::early_init_intel()
>> - detect_tme()
>> c->x86_phys_bits -= keyid_bits;
>
> Hmm.. Do I read it wrong:
>
> static void __init early_identify_cpu(struct cpuinfo_x86 *c)
> {
> ...
> /* cyrix could have cpuid enabled via c_identify()*/
> if (have_cpuid_p()) {
> ...
> // Here we call early_intel_init()
> if (this_cpu->c_early_init)
> this_cpu->c_early_init(c);
> ...
> }
>
> get_cpu_address_sizes(c);
> ...
> }
>
> ?
>
> As far as I see get_cpu_address_sizes() called after early_intel_init().

On `58720809f527 v6.6-rc6 6.6-rc6 2de3c93ef41b' is what I have:

,----
| 1599 /* cyrix could have cpuid enabled via c_identify()*/
| 1600 if (have_cpuid_p()) {
| 1601 cpu_detect(c);
| 1602 get_cpu_vendor(c);
| 1603 get_cpu_cap(c);
| 1604 get_cpu_address_sizes(c); <= called first
| 1605 setup_force_cpu_cap(X86_FEATURE_CPUID);
| 1606 cpu_parse_early_param();
| 1607
| 1608 if (this_cpu->c_early_init)
| 1609 this_cpu->c_early_init(c);
| 1610
| 1611 c->cpu_index = 0;
| 1612 filter_cpuid_features(c, false);
| 1613
| 1614 if (this_cpu->c_bsp_init)
| 1615 this_cpu->c_bsp_init(c);
| 1616 } else {
| 1617 setup_clear_cpu_cap(X86_FEATURE_CPUID);
| 1618 }
`----
Listing 1: arch/x86/kernel/cpu/common.c

=> get_cpu_address_sizes() is called first which is also conform to my
experiments and instrumentation.

--
Jeremy
One Emacs to rule them all


Attachments:
(No filename) (4.66 kB)

2023-10-16 16:27:29

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

On Mon, Oct 16, 2023 at 09:14:35AM -0700, Compostella, Jeremy wrote:
> <[email protected]> writes:
>
> > On Fri, Oct 13, 2023 at 04:03:02PM -0700, Compostella, Jeremy wrote:
> >> "[email protected]" <[email protected]> writes:
> >> > On Tue, Oct 03, 2023 at 02:06:52AM +0000, Huang, Kai wrote:
> >> >> On Tue, 2023-10-03 at 01:47 +0300, [email protected] wrote:
> >> >> > On Fri, Sep 29, 2023 at 09:14:00AM +0000, Huang, Kai wrote:
> >> >> > > On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
> >> >> > > > On TME enabled platform, BIOS publishes MTRR taking into account Total
> >> >> > > > Memory Encryption (TME) reserved bits.
> >> >> > > >
> >> >> > > > generic_get_mtrr() performs a sanity check of the MTRRs relying on the
> >> >> > > > `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
> >> >> > > > `x86_phys_bits' field. But at the time the generic_get_mtrr()
> >> >> > > > function is ran the `x86_phys_bits' has not been updated by
> >> >> > > > detect_tme() when TME is enabled.
> >> >> > > >
> >> >> > > > Since the x86_phys_bits does not reflect yet the real maximal physical
> >> >> > > > address size yet generic_get_mtrr() complains by logging the following
> >> >> > > > messages.
> >> >> > > >
> >> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> >> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> >> >> > > > [...]
> >> >> > > >
> >> >> > > > In such a situation, generic_get_mtrr() returns an incorrect size but
> >> >> > > > no side effect were observed during our testing.
> >> >> > > >
> >> >> > > > For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
> >> >> > > > move the detect_tme() call from init_intel() to early_init_intel().
> >> >> > >
> >> >> > > Hi,
> >> >> > >
> >> >> > > This move looks good to me, but +Kirill who is the author of detect_tme() for
> >> >> > > further comments.
> >> >> > >
> >> >> > > Also I am not sure whether it's worth to consider to move this to
> >> >> > > get_cpu_address_sizes(), which calculates the virtual/physical address sizes.
> >> >> > > Thus it seems anything that can impact physical address size
> >> >> > > could be put there.
> >> >> >
> >> >> > Actually, I am not sure how this patch works. AFAICS after the patch we
> >> >> > have the following callchain:
> >> >> >
> >> >> > early_identify_cpu()
> >> >> > this_cpu->c_early_init() (which is early_init_init())
> >> >> > detect_tme()
> >> >> > c->x86_phys_bits -= keyid_bits;
> >> >> > get_cpu_address_sizes(c);
> >> >> > c->x86_phys_bits = eax & 0xff;
> >> >> >
> >> >> > Looks like get_cpu_address_sizes() would override what detect_tme() does.
> >> >>
> >> >> After this patch, early_identify_cpu() calls get_cpu_address_sizes() first and
> >> >> then calls c_early_init(), which calls detect_tme().
> >> >>
> >> >> So looks no override. No?
> >>
> >> No override indeed as get_cpu_address_sizes() is always called before
> >> early_init_intel or init_intel().
> >>
> >> - init/main.c::start_kernel()
> >> - arch/x86/kernel/setup.c::setup_arch()
> >> - arch/x86/kernel/cpu/common.c::early_cpu_init()
> >> - early_identify_cpu()
> >> - get_cpu_address_sizes(c)
> >> c->x86_phys_bits = eax & 0xff;
> >> - arch/x86/kernel/cpu/intel.c::early_init_intel()
> >> - detect_tme()
> >> c->x86_phys_bits -= keyid_bits;
> >
> > Hmm.. Do I read it wrong:
> >
> > static void __init early_identify_cpu(struct cpuinfo_x86 *c)
> > {
> > ...
> > /* cyrix could have cpuid enabled via c_identify()*/
> > if (have_cpuid_p()) {
> > ...
> > // Here we call early_intel_init()
> > if (this_cpu->c_early_init)
> > this_cpu->c_early_init(c);
> > ...
> > }
> >
> > get_cpu_address_sizes(c);
> > ...
> > }
> >
> > ?
> >
> > As far as I see get_cpu_address_sizes() called after early_intel_init().
>
> On `58720809f527 v6.6-rc6 6.6-rc6 2de3c93ef41b' is what I have:
>
> ,----
> | 1599 /* cyrix could have cpuid enabled via c_identify()*/
> | 1600 if (have_cpuid_p()) {
> | 1601 cpu_detect(c);
> | 1602 get_cpu_vendor(c);
> | 1603 get_cpu_cap(c);
> | 1604 get_cpu_address_sizes(c); <= called first
> | 1605 setup_force_cpu_cap(X86_FEATURE_CPUID);
> | 1606 cpu_parse_early_param();
> | 1607
> | 1608 if (this_cpu->c_early_init)
> | 1609 this_cpu->c_early_init(c);
> | 1610
> | 1611 c->cpu_index = 0;
> | 1612 filter_cpuid_features(c, false);
> | 1613
> | 1614 if (this_cpu->c_bsp_init)
> | 1615 this_cpu->c_bsp_init(c);
> | 1616 } else {
> | 1617 setup_clear_cpu_cap(X86_FEATURE_CPUID);
> | 1618 }
> `----
> Listing 1: arch/x86/kernel/cpu/common.c
>
> => get_cpu_address_sizes() is called first which is also conform to my
> experiments and instrumentation.

Ah. It got patched in tip tree. See commit fbf6449f84bf.

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-10-16 17:03:20

by Compostella, Jeremy

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

#+begin_signature
--
Jeremy
One Emacs to rule them all
#+end_signature<[email protected]> writes:

> On Mon, Oct 16, 2023 at 09:14:35AM -0700, Compostella, Jeremy wrote:
>> <[email protected]> writes:
>>
>> > On Fri, Oct 13, 2023 at 04:03:02PM -0700, Compostella, Jeremy wrote:
>> >> "[email protected]" <[email protected]> writes:
>> >> > On Tue, Oct 03, 2023 at 02:06:52AM +0000, Huang, Kai wrote:
>> >> >> On Tue, 2023-10-03 at 01:47 +0300, [email protected] wrote:
>> >> >> > On Fri, Sep 29, 2023 at 09:14:00AM +0000, Huang, Kai wrote:
>> >> >> > > On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
>> >> >> > > > On TME enabled platform, BIOS publishes MTRR taking into account Total
>> >> >> > > > Memory Encryption (TME) reserved bits.
>> >> >> > > >
>> >> >> > > > generic_get_mtrr() performs a sanity check of the MTRRs relying on the
>> >> >> > > > `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
>> >> >> > > > `x86_phys_bits' field. But at the time the generic_get_mtrr()
>> >> >> > > > function is ran the `x86_phys_bits' has not been updated by
>> >> >> > > > detect_tme() when TME is enabled.
>> >> >> > > >
>> >> >> > > > Since the x86_phys_bits does not reflect yet the real maximal physical
>> >> >> > > > address size yet generic_get_mtrr() complains by logging the following
>> >> >> > > > messages.
>> >> >> > > >
>> >> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
>> >> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
>> >> >> > > > [...]
>> >> >> > > >
>> >> >> > > > In such a situation, generic_get_mtrr() returns an incorrect size but
>> >> >> > > > no side effect were observed during our testing.
>> >> >> > > >
>> >> >> > > > For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
>> >> >> > > > move the detect_tme() call from init_intel() to early_init_intel().
>> >> >> > >
>> >> >> > > Hi,
>> >> >> > >
>> >> >> > > This move looks good to me, but +Kirill who is the author of detect_tme() for
>> >> >> > > further comments.
>> >> >> > >
>> >> >> > > Also I am not sure whether it's worth to consider to move this to
>> >> >> > > get_cpu_address_sizes(), which calculates the
>> >> >> > > virtual/physical address sizes.
>> >> >> > > Thus it seems anything that can impact physical address size
>> >> >> > > could be put there.
>> >> >> >
>> >> >> > Actually, I am not sure how this patch works. AFAICS after the patch we
>> >> >> > have the following callchain:
>> >> >> >
>> >> >> > early_identify_cpu()
>> >> >> > this_cpu->c_early_init() (which is early_init_init())
>> >> >> > detect_tme()
>> >> >> > c->x86_phys_bits -= keyid_bits;
>> >> >> > get_cpu_address_sizes(c);
>> >> >> > c->x86_phys_bits = eax & 0xff;
>> >> >> >
>> >> >> > Looks like get_cpu_address_sizes() would override what detect_tme() does.
>> >> >>
>> >> >> After this patch, early_identify_cpu() calls get_cpu_address_sizes() first and
>> >> >> then calls c_early_init(), which calls detect_tme().
>> >> >>
>> >> >> So looks no override. No?
>> >>
>> >> No override indeed as get_cpu_address_sizes() is always called before
>> >> early_init_intel or init_intel().
>> >>
>> >> - init/main.c::start_kernel()
>> >> - arch/x86/kernel/setup.c::setup_arch()
>> >> - arch/x86/kernel/cpu/common.c::early_cpu_init()
>> >> - early_identify_cpu()
>> >> - get_cpu_address_sizes(c)
>> >> c->x86_phys_bits = eax & 0xff;
>> >> - arch/x86/kernel/cpu/intel.c::early_init_intel()
>> >> - detect_tme()
>> >> c->x86_phys_bits -= keyid_bits;
>> >
>> > Hmm.. Do I read it wrong:
>> >
>> > static void __init early_identify_cpu(struct cpuinfo_x86 *c)
>> > {
>> > ...
>> > /* cyrix could have cpuid enabled via c_identify()*/
>> > if (have_cpuid_p()) {
>> > ...
>> > // Here we call early_intel_init()
>> > if (this_cpu->c_early_init)
>> > this_cpu->c_early_init(c);
>> > ...
>> > }
>> >
>> > get_cpu_address_sizes(c);
>> > ...
>> > }
>> >
>> > ?
>> >
>> > As far as I see get_cpu_address_sizes() called after early_intel_init().
>>
>> On `58720809f527 v6.6-rc6 6.6-rc6 2de3c93ef41b' is what I have:
>>
>> ,----
>> | 1599 /* cyrix could have cpuid enabled via c_identify()*/
>> | 1600 if (have_cpuid_p()) {
>> | 1601 cpu_detect(c);
>> | 1602 get_cpu_vendor(c);
>> | 1603 get_cpu_cap(c);
>> | 1604 get_cpu_address_sizes(c); <= called first
>> | 1605 setup_force_cpu_cap(X86_FEATURE_CPUID);
>> | 1606 cpu_parse_early_param();
>> | 1607
>> | 1608 if (this_cpu->c_early_init)
>> | 1609 this_cpu->c_early_init(c);
>> | 1610
>> | 1611 c->cpu_index = 0;
>> | 1612 filter_cpuid_features(c, false);
>> | 1613
>> | 1614 if (this_cpu->c_bsp_init)
>> | 1615 this_cpu->c_bsp_init(c);
>> | 1616 } else {
>> | 1617 setup_clear_cpu_cap(X86_FEATURE_CPUID);
>> | 1618 }
>> `----
>> Listing 1: arch/x86/kernel/cpu/common.c
>>
>> => get_cpu_address_sizes() is called first which is also conform to my
>> experiments and instrumentation.
>
> Ah. It got patched in tip tree. See commit fbf6449f84bf.

This commit breaks AMD code as early_init_amd() calls
early_detect_mem_encrypt() to adjust x86_phys_bits which is not
initialized properly and then overwritten after.


Attachments:
(No filename) (5.40 kB)

2023-10-17 11:40:25

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/cpu/intel: Fix MTRR verification for TME enabled platforms

On Mon, Oct 16, 2023 at 10:00:06AM -0700, Compostella, Jeremy wrote:
> #+begin_signature
> --
> Jeremy
> One Emacs to rule them all
> #+end_signature<[email protected]> writes:
>
> > On Mon, Oct 16, 2023 at 09:14:35AM -0700, Compostella, Jeremy wrote:
> >> <[email protected]> writes:
> >>
> >> > On Fri, Oct 13, 2023 at 04:03:02PM -0700, Compostella, Jeremy wrote:
> >> >> "[email protected]" <[email protected]> writes:
> >> >> > On Tue, Oct 03, 2023 at 02:06:52AM +0000, Huang, Kai wrote:
> >> >> >> On Tue, 2023-10-03 at 01:47 +0300, [email protected] wrote:
> >> >> >> > On Fri, Sep 29, 2023 at 09:14:00AM +0000, Huang, Kai wrote:
> >> >> >> > > On Thu, 2023-09-28 at 15:30 -0700, Compostella, Jeremy wrote:
> >> >> >> > > > On TME enabled platform, BIOS publishes MTRR taking into account Total
> >> >> >> > > > Memory Encryption (TME) reserved bits.
> >> >> >> > > >
> >> >> >> > > > generic_get_mtrr() performs a sanity check of the MTRRs relying on the
> >> >> >> > > > `phys_hi_rsvd' variable which is set using the cpuinfo_x86 structure
> >> >> >> > > > `x86_phys_bits' field. But at the time the generic_get_mtrr()
> >> >> >> > > > function is ran the `x86_phys_bits' has not been updated by
> >> >> >> > > > detect_tme() when TME is enabled.
> >> >> >> > > >
> >> >> >> > > > Since the x86_phys_bits does not reflect yet the real maximal physical
> >> >> >> > > > address size yet generic_get_mtrr() complains by logging the following
> >> >> >> > > > messages.
> >> >> >> > > >
> >> >> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> >> >> >> > > > mtrr: your BIOS has configured an incorrect mask, fixing it.
> >> >> >> > > > [...]
> >> >> >> > > >
> >> >> >> > > > In such a situation, generic_get_mtrr() returns an incorrect size but
> >> >> >> > > > no side effect were observed during our testing.
> >> >> >> > > >
> >> >> >> > > > For `x86_phys_bits' to be updated before generic_get_mtrr() runs,
> >> >> >> > > > move the detect_tme() call from init_intel() to early_init_intel().
> >> >> >> > >
> >> >> >> > > Hi,
> >> >> >> > >
> >> >> >> > > This move looks good to me, but +Kirill who is the author of detect_tme() for
> >> >> >> > > further comments.
> >> >> >> > >
> >> >> >> > > Also I am not sure whether it's worth to consider to move this to
> >> >> >> > > get_cpu_address_sizes(), which calculates the
> >> >> >> > > virtual/physical address sizes.
> >> >> >> > > Thus it seems anything that can impact physical address size
> >> >> >> > > could be put there.
> >> >> >> >
> >> >> >> > Actually, I am not sure how this patch works. AFAICS after the patch we
> >> >> >> > have the following callchain:
> >> >> >> >
> >> >> >> > early_identify_cpu()
> >> >> >> > this_cpu->c_early_init() (which is early_init_init())
> >> >> >> > detect_tme()
> >> >> >> > c->x86_phys_bits -= keyid_bits;
> >> >> >> > get_cpu_address_sizes(c);
> >> >> >> > c->x86_phys_bits = eax & 0xff;
> >> >> >> >
> >> >> >> > Looks like get_cpu_address_sizes() would override what detect_tme() does.
> >> >> >>
> >> >> >> After this patch, early_identify_cpu() calls get_cpu_address_sizes() first and
> >> >> >> then calls c_early_init(), which calls detect_tme().
> >> >> >>
> >> >> >> So looks no override. No?
> >> >>
> >> >> No override indeed as get_cpu_address_sizes() is always called before
> >> >> early_init_intel or init_intel().
> >> >>
> >> >> - init/main.c::start_kernel()
> >> >> - arch/x86/kernel/setup.c::setup_arch()
> >> >> - arch/x86/kernel/cpu/common.c::early_cpu_init()
> >> >> - early_identify_cpu()
> >> >> - get_cpu_address_sizes(c)
> >> >> c->x86_phys_bits = eax & 0xff;
> >> >> - arch/x86/kernel/cpu/intel.c::early_init_intel()
> >> >> - detect_tme()
> >> >> c->x86_phys_bits -= keyid_bits;
> >> >
> >> > Hmm.. Do I read it wrong:
> >> >
> >> > static void __init early_identify_cpu(struct cpuinfo_x86 *c)
> >> > {
> >> > ...
> >> > /* cyrix could have cpuid enabled via c_identify()*/
> >> > if (have_cpuid_p()) {
> >> > ...
> >> > // Here we call early_intel_init()
> >> > if (this_cpu->c_early_init)
> >> > this_cpu->c_early_init(c);
> >> > ...
> >> > }
> >> >
> >> > get_cpu_address_sizes(c);
> >> > ...
> >> > }
> >> >
> >> > ?
> >> >
> >> > As far as I see get_cpu_address_sizes() called after early_intel_init().
> >>
> >> On `58720809f527 v6.6-rc6 6.6-rc6 2de3c93ef41b' is what I have:
> >>
> >> ,----
> >> | 1599 /* cyrix could have cpuid enabled via c_identify()*/
> >> | 1600 if (have_cpuid_p()) {
> >> | 1601 cpu_detect(c);
> >> | 1602 get_cpu_vendor(c);
> >> | 1603 get_cpu_cap(c);
> >> | 1604 get_cpu_address_sizes(c); <= called first
> >> | 1605 setup_force_cpu_cap(X86_FEATURE_CPUID);
> >> | 1606 cpu_parse_early_param();
> >> | 1607
> >> | 1608 if (this_cpu->c_early_init)
> >> | 1609 this_cpu->c_early_init(c);
> >> | 1610
> >> | 1611 c->cpu_index = 0;
> >> | 1612 filter_cpuid_features(c, false);
> >> | 1613
> >> | 1614 if (this_cpu->c_bsp_init)
> >> | 1615 this_cpu->c_bsp_init(c);
> >> | 1616 } else {
> >> | 1617 setup_clear_cpu_cap(X86_FEATURE_CPUID);
> >> | 1618 }
> >> `----
> >> Listing 1: arch/x86/kernel/cpu/common.c
> >>
> >> => get_cpu_address_sizes() is called first which is also conform to my
> >> experiments and instrumentation.
> >
> > Ah. It got patched in tip tree. See commit fbf6449f84bf.
>
> This commit breaks AMD code as early_init_amd() calls
> early_detect_mem_encrypt() to adjust x86_phys_bits which is not
> initialized properly and then overwritten after.

Adam, any comments?

--
Kiryl Shutsemau / Kirill A. Shutemov