2011-05-24 23:04:35

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 1/3] x86, intel: Output microcode revision

From: Andi Kleen <[email protected]>

I got a request to make it easier to determine the microcode update level
on Intel CPUs. This patch adds a new "cpu update" field to /proc/cpuinfo,
which I added at the end to minimize impact on parsers.

The update level is also outputed on fatal machine checks together
with the other CPUID model information.

I removed the respective code from the microcode update driver, it
just reads the field from cpu_data. Also when the microcode is updated
it fills in the new values too.

I had to add a memory barrier to native_cpuid to prevent it being
optimized away when the result is not used.

This turns out to clean up further code which already got this
information manually. This is done in followon patches.

Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/include/asm/processor.h | 5 ++++-
arch/x86/kernel/cpu/intel.c | 10 ++++++++++
arch/x86/kernel/cpu/mcheck/mce.c | 5 +++--
arch/x86/kernel/cpu/proc.c | 6 +++++-
arch/x86/kernel/microcode_intel.c | 9 +++------
5 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4c25ab4..23b7e26 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -111,6 +111,8 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
#endif
+ /* CPU update signature */
+ u32 x86_cpu_update;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));

#define X86_VENDOR_INTEL 0
@@ -179,7 +181,8 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
- : "0" (*eax), "2" (*ecx));
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
}

static inline void load_cr3(pgd_t *pgdir)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1edf5ba..150623a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -364,6 +364,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)

early_init_intel(c);

+ /* Determine CPU update level */
+ if (c->x86 >= 6 && !(cpu_has(c, X86_FEATURE_IA64))) {
+ unsigned lo;
+
+ wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ /* The CPUID 1 fills in the MSR */
+ cpuid_eax(1);
+ rdmsr(MSR_IA32_UCODE_REV, lo, c->x86_cpu_update);
+ }
+
intel_workarounds(c);

/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ff1ae9b..e93c41f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -220,8 +220,9 @@ static void print_mce(struct mce *m)
pr_cont("MISC %llx ", m->misc);

pr_cont("\n");
- pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
- m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
+ pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x CPU-UPDATE %u\n",
+ m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
+ cpu_data(m->extcpu).x86_cpu_update);

/*
* Print out human-readable details about the MCE error,
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 62ac8cb..cefcc27 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -132,8 +132,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, " [%d]", i);
}
}
+ seq_printf(m, "\n");

- seq_printf(m, "\n\n");
+ if (c->x86_cpu_update)
+ seq_printf(m, "cpu update\t: %u\n", c->x86_cpu_update);
+
+ seq_printf(m, "\n");

return 0;
}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 1a1b606..bb1fec6 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -161,12 +161,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
csig->pf = 1 << ((val[1] >> 18) & 7);
}

- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- /* see notes above for revision 1.07. Apparent chip bug */
- sync_core();
- /* get the current revision from MSR 0x8B */
- rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
-
+ csig->rev = c->x86_cpu_update;
pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
cpu_num, csig->sig, csig->pf, csig->rev);

@@ -300,6 +295,7 @@ static int apply_microcode(int cpu)
struct ucode_cpu_info *uci;
unsigned int val[2];
int cpu_num;
+ struct cpuinfo_x86 *c = &cpu_data(cpu_num);

cpu_num = raw_smp_processor_id();
uci = ucode_cpu_info + cpu;
@@ -335,6 +331,7 @@ static int apply_microcode(int cpu)
(mc_intel->hdr.date >> 16) & 0xff);

uci->cpu_sig.rev = val[1];
+ c->x86_cpu_update = val[1];

return 0;
}
--
1.7.4.4


2011-05-24 23:04:34

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 2/3] x86, intel: Use cpu_update for Atom errata check

From: Andi Kleen <[email protected]>

Now that the cpu update level is available the Atom PSE errata
check can use it directly without reading the MSR again.

Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/kernel/cpu/intel.c | 15 ++++-----------
1 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 150623a..9ae3782 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -55,17 +55,10 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
* need the microcode to have already been loaded... so if it is
* not, recommend a BIOS update and disable large pages.
*/
- if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) {
- u32 ucode, junk;
-
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- sync_core();
- rdmsr(MSR_IA32_UCODE_REV, junk, ucode);
-
- if (ucode < 0x20e) {
- printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
- clear_cpu_cap(c, X86_FEATURE_PSE);
- }
+ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
+ c->x86_cpu_update < 0x20e) {
+ printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+ clear_cpu_cap(c, X86_FEATURE_PSE);
}

#ifdef CONFIG_X86_64
--
1.7.4.4

2011-05-24 23:04:24

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 3/3] coretemp: Get microcode revision from cpu_data

From: Andi Kleen <[email protected]>

Now that the ucode revision is available in cpu_data remove
the existing code in coretemp.c to query it manually. Read the ucode
revision from cpu_data instead

Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Andi Kleen <[email protected]>
---
drivers/hwmon/coretemp.c | 29 +++++------------------------
1 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 9577c43..aff3e2c 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -324,15 +324,6 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
}
}

-static void __devinit get_ucode_rev_on_cpu(void *edx)
-{
- u32 eax;
-
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- sync_core();
- rdmsr(MSR_IA32_UCODE_REV, eax, *(u32 *)edx);
-}
-
static int get_pkg_tjmax(unsigned int cpu, struct device *dev)
{
int err;
@@ -433,21 +424,11 @@ static int chk_ucode_version(struct platform_device *pdev)
* Readings might stop update when processor visited too deep sleep,
* fixed for stepping D0 (6EC).
*/
- if (c->x86_model == 0xe && c->x86_mask < 0xc) {
- /* check for microcode update */
- err = smp_call_function_single(pdev->id, get_ucode_rev_on_cpu,
- &edx, 1);
- if (err) {
- dev_err(&pdev->dev,
- "Cannot determine microcode revision of "
- "CPU#%u (%d)!\n", pdev->id, err);
- return -ENODEV;
- } else if (edx < 0x39) {
- dev_err(&pdev->dev,
- "Errata AE18 not fixed, update BIOS or "
- "microcode of the CPU!\n");
- return -ENODEV;
- }
+ if (c->x86_model == 0xe && c->x86_mask < 0xc && c->x86_cpu_update < 0x39) {
+ dev_err(&pdev->dev,
+ "Errata AE18 not fixed, update BIOS or "
+ "microcode of the CPU!\n");
+ return -ENODEV;
}
return 0;
}
--
1.7.4.4

2011-05-25 00:01:11

by Fenghua Yu

[permalink] [raw]
Subject: RE: [PATCH 3/3] coretemp: Get microcode revision from cpu_data

> Subject: [PATCH 3/3] coretemp: Get microcode revision from cpu_data
>
> From: Andi Kleen <[email protected]>
>
> Now that the ucode revision is available in cpu_data remove
> the existing code in coretemp.c to query it manually. Read the ucode
> revision from cpu_data instead
>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> drivers/hwmon/coretemp.c | 29 +++++------------------------
> 1 files changed, 5 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index 9577c43..aff3e2c 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -324,15 +324,6 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32
> id, struct device *dev)
> }
> }
>
> -static void __devinit get_ucode_rev_on_cpu(void *edx)
> -{
> - u32 eax;
> -
> - wrmsr(MSR_IA32_UCODE_REV, 0, 0);
> - sync_core();
> - rdmsr(MSR_IA32_UCODE_REV, eax, *(u32 *)edx);
> -}
> -
> static int get_pkg_tjmax(unsigned int cpu, struct device *dev)
> {
> int err;
> @@ -433,21 +424,11 @@ static int chk_ucode_version(struct
> platform_device *pdev)
> * Readings might stop update when processor visited too deep
> sleep,
> * fixed for stepping D0 (6EC).
> */
> - if (c->x86_model == 0xe && c->x86_mask < 0xc) {
> - /* check for microcode update */
> - err = smp_call_function_single(pdev->id,
> get_ucode_rev_on_cpu,
> - &edx, 1);
> - if (err) {
> - dev_err(&pdev->dev,
> - "Cannot determine microcode revision of "
> - "CPU#%u (%d)!\n", pdev->id, err);
> - return -ENODEV;
> - } else if (edx < 0x39) {
> - dev_err(&pdev->dev,
> - "Errata AE18 not fixed, update BIOS or "
> - "microcode of the CPU!\n");
> - return -ENODEV;
> - }
> + if (c->x86_model == 0xe && c->x86_mask < 0xc && c->x86_cpu_update
> < 0x39) {
> + dev_err(&pdev->dev,
> + "Errata AE18 not fixed, update BIOS or "
> + "microcode of the CPU!\n");
> + return -ENODEV;
> }
> return 0;
> }
> --
> 1.7.4.4

With this patch, the variable err and edx defined in chk_ucode_version() won't be used and should be removed to avoid compilation warnings.

Thanks.

-Fenghua

2011-05-25 00:39:53

by Fenghua Yu

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

>@@ -300,6 +295,7 @@ static int apply_microcode(int cpu)
> struct ucode_cpu_info *uci;
> unsigned int val[2];
> int cpu_num;
>+ struct cpuinfo_x86 *c = &cpu_data(cpu_num);

This is wrong. cpu_num is not initialized yet at this point. You need to assign c after cpu_num is initialized in the next statement.

>
> cpu_num = raw_smp_processor_id();
> uci = ucode_cpu_info + cpu;

Thanks.

-Fenghua

2011-05-25 00:48:01

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

On Tue, May 24, 2011 at 05:25:08PM -0700, Yu, Fenghua wrote:
> >@@ -300,6 +295,7 @@ static int apply_microcode(int cpu)
> > struct ucode_cpu_info *uci;
> > unsigned int val[2];
> > int cpu_num;
> >+ struct cpuinfo_x86 *c = &cpu_data(cpu_num);
>
> This is wrong. cpu_num is not initialized yet at this point. You need to assign c after cpu_num is initialized in the next statement.

Thanks. Stupid bug. I'll post an updated version.

-Andi

2011-05-25 06:55:26

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


* Andi Kleen <[email protected]> wrote:

> From: Andi Kleen <[email protected]>
>
> I got a request to make it easier to determine the microcode update level
> on Intel CPUs. This patch adds a new "cpu update" field to /proc/cpuinfo,
> which I added at the end to minimize impact on parsers.

Agreed, that is a good idea, adding this to cpuinfo makes sense.

Your patch is rather messy though:

> The update level is also outputed on fatal machine checks together
> with the other CPUID model information.
>
> I removed the respective code from the microcode update driver, it
> just reads the field from cpu_data. Also when the microcode is updated
> it fills in the new values too.
>
> I had to add a memory barrier to native_cpuid to prevent it being
> optimized away when the result is not used.
>
> This turns out to clean up further code which already got this
> information manually. This is done in followon patches.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/include/asm/processor.h | 5 ++++-
> arch/x86/kernel/cpu/intel.c | 10 ++++++++++
> arch/x86/kernel/cpu/mcheck/mce.c | 5 +++--
> arch/x86/kernel/cpu/proc.c | 6 +++++-
> arch/x86/kernel/microcode_intel.c | 9 +++------
> 5 files changed, 25 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 4c25ab4..23b7e26 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -111,6 +111,8 @@ struct cpuinfo_x86 {
> /* Index into per_cpu list: */
> u16 cpu_index;
> #endif
> + /* CPU update signature */
> + u32 x86_cpu_update;

This should be cpu_microcode_version instead. We already know its x86 so the
x86_ prefix is superfluous. 'cpu_update' is also rather ambigious and does not
describe much.

> diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
> index 1edf5ba..150623a 100644
> --- a/arch/x86/kernel/cpu/intel.c
> +++ b/arch/x86/kernel/cpu/intel.c
> @@ -364,6 +364,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
>
> early_init_intel(c);
>
> + /* Determine CPU update level */
> + if (c->x86 >= 6 && !(cpu_has(c, X86_FEATURE_IA64))) {
> + unsigned lo;
> +
> + wrmsr(MSR_IA32_UCODE_REV, 0, 0);
> + /* The CPUID 1 fills in the MSR */
> + cpuid_eax(1);
> + rdmsr(MSR_IA32_UCODE_REV, lo, c->x86_cpu_update);
> + }


So, during the course of developing this, did it occur to you that other x86
CPUs should fill in this information as well?

If yes, did it occur to you to do the obvious git log
arch/x86/kernel/microcode*.c command to figure out who else might be interested
in this, and add them to the Cc: instead of forcing the maintainer to do that?

Also, and this is a repeat pattern from you Andi, you've silently made code worse
while moving it. You've changed this:

> - wrmsr(MSR_IA32_UCODE_REV, 0, 0);
> - /* see notes above for revision 1.07. Apparent chip bug */
> - sync_core();
> - /* get the current revision from MSR 0x8B */
> - rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
> -

To:

> + unsigned lo;
> +
> + wrmsr(MSR_IA32_UCODE_REV, 0, 0);
> + /* The CPUID 1 fills in the MSR */
> + cpuid_eax(1);
> + rdmsr(MSR_IA32_UCODE_REV, lo, c->x86_cpu_update);

Firstly, name 'lo' appropriately - 'lo' in itself says nothing and keeps the
reader wondering about what recreational drugs the writer of this code was on.

Obviously Secondly you've removed the reference to this chip bug:

> - /* see notes above for revision 1.07. Apparent chip bug */

Why? If you think it's not a bug (but got documented meanwhile as the official
way of updating the microcode and reading out the version) then update the
comments in a *separate* patch, and update the *other* reference to it as well.

You are repeating the same mistakes again and again.

> intel_workarounds(c);
>
> /*
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index ff1ae9b..e93c41f 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -220,8 +220,9 @@ static void print_mce(struct mce *m)
> pr_cont("MISC %llx ", m->misc);
>
> pr_cont("\n");
> - pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
> - m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
> + pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x CPU-UPDATE %u\n",
> + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
> + cpu_data(m->extcpu).x86_cpu_update);

This text too should be microcode-version or so. Also, while at it please fix
that printk() to not shout at users needlessly.

> diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
> index 62ac8cb..cefcc27 100644
> --- a/arch/x86/kernel/cpu/proc.c
> +++ b/arch/x86/kernel/cpu/proc.c
> @@ -132,8 +132,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
> seq_printf(m, " [%d]", i);
> }
> }
> + seq_printf(m, "\n");
>
> - seq_printf(m, "\n\n");
> + if (c->x86_cpu_update)
> + seq_printf(m, "cpu update\t: %u\n", c->x86_cpu_update);
> +
> + seq_printf(m, "\n");

This too should say microcode version.

Also, please move the field to the logical place, next to "stepping:". The
argument about parsers is bogus - anyone parsing /proc/cpuinfo is not in a
fastpath, full stop.

Also, the above sequence is rather suboptimal to begin with - we can and only
want to execute a *single* seq_printf() there.

> --- a/arch/x86/kernel/microcode_intel.c
> +++ b/arch/x86/kernel/microcode_intel.c
> @@ -161,12 +161,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
> csig->pf = 1 << ((val[1] >> 18) & 7);
> }
>
> - wrmsr(MSR_IA32_UCODE_REV, 0, 0);
> - /* see notes above for revision 1.07. Apparent chip bug */
> - sync_core();
> - /* get the current revision from MSR 0x8B */
> - rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
> -
> + csig->rev = c->x86_cpu_update;
> pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
> cpu_num, csig->sig, csig->pf, csig->rev);
>
>
> @@ -300,6 +295,7 @@ static int apply_microcode(int cpu)
> struct ucode_cpu_info *uci;
> unsigned int val[2];
> int cpu_num;
> + struct cpuinfo_x86 *c = &cpu_data(cpu_num);
>
> cpu_num = raw_smp_processor_id();
> uci = ucode_cpu_info + cpu;
> @@ -335,6 +331,7 @@ static int apply_microcode(int cpu)
> (mc_intel->hdr.date >> 16) & 0xff);
>
> uci->cpu_sig.rev = val[1];
> + c->x86_cpu_update = val[1];
>
> return 0;

Please factor out the reading of the microcode version - you have now created
two duplicated open-coded versions of it in cpu.c and microcode_intel.c, with
mismatching comments - not good.

Also, in this branch:

if (val[1] != mc_intel->hdr.rev) {
pr_err("CPU%d update to revision 0x%x failed\n",
cpu_num, mc_intel->hdr.rev);
return -1;
}

it would be nice to put a check:

WARN_ON_ONCE(val[1] != c->x86_cpu_update);

To make sure that our notion of the version is still in sync with what the
hardware's notion is. (it should be)

Thanks,

Ingo

2011-05-25 06:59:46

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86, intel: Use cpu_update for Atom errata check


* Andi Kleen <[email protected]> wrote:

> From: Andi Kleen <[email protected]>
>
> Now that the cpu update level is available the Atom PSE errata
> check can use it directly without reading the MSR again.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/kernel/cpu/intel.c | 15 ++++-----------

Ok, it's nice that you have split this one out.

Please also split out the MCE printk change you did in the first patch - even
if it's a oneliner we want the first patch to only include changes focused to
the primary purpose alone: the introduction of x86_cpu::microcode_version.

Also, please split the first patch into two other parts: a first one that
factors out the Intel microcode-version MSR function into a separate function,
and the second patch that introduces the x86_cpu::microcode_version field and
fills it in in the CPU detection code and keeps it updated in the microcode
driver.

Thanks,

Ingo

2011-05-25 07:06:14

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


* Andi Kleen <[email protected]> wrote:

> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -111,6 +111,8 @@ struct cpuinfo_x86 {
> /* Index into per_cpu list: */
> u16 cpu_index;
> #endif
> + /* CPU update signature */
> + u32 x86_cpu_update;

Btw,. there's one subtle thing here: it's possible to have *different*
microcode versions on different CPUs in te system.

This can happen if for example the BIOS somehow does not apply the right
microcode to all CPUs. It can also happen if physically different microcode
version CPUs are mixed. In theory people can mix steppings as well.

Would you be interested in adding a quick debugging check after all CPUs have
been brought online, and print some very visible boot warning message if
there's a mismatch between the steppings or microcode versions? Perhaps also
taint the kernel.

Having non-updated microcode is one thing, but having *mixed* versions can
introduce its own set of problems: there are workarounds that have to be
activated on all CPUs, otherwise the result may be undefined.

We did not check for this before, so this would be a separate patch as well.

This should be done in a generic way, so it should work with other x86 CPUs as
well.

Thanks,

Ingo

2011-05-25 08:00:48

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

On Wed, May 25, 2011 at 08:54:51AM +0200, Ingo Molnar wrote:
>
> * Andi Kleen <[email protected]> wrote:
>
> > From: Andi Kleen <[email protected]>
> >
> > I got a request to make it easier to determine the microcode update level
> > on Intel CPUs. This patch adds a new "cpu update" field to /proc/cpuinfo,
> > which I added at the end to minimize impact on parsers.
>
> Agreed, that is a good idea, adding this to cpuinfo makes sense.

Frankly, I'm not even 100% persuaded this is needed. The coretemp.c
jump-through-hoops to get the ucode revision is maybe the only case that
warrants adding that field to /proc/cpuinfo.

>
> Your patch is rather messy though:
>
> > The update level is also outputed on fatal machine checks together
> > with the other CPUID model information.
> >
> > I removed the respective code from the microcode update driver, it
> > just reads the field from cpu_data. Also when the microcode is updated
> > it fills in the new values too.
> >
> > I had to add a memory barrier to native_cpuid to prevent it being
> > optimized away when the result is not used.
> >
> > This turns out to clean up further code which already got this
> > information manually. This is done in followon patches.
> >
> > Signed-off-by: Andi Kleen <[email protected]>
> > ---
> > arch/x86/include/asm/processor.h | 5 ++++-
> > arch/x86/kernel/cpu/intel.c | 10 ++++++++++
> > arch/x86/kernel/cpu/mcheck/mce.c | 5 +++--
> > arch/x86/kernel/cpu/proc.c | 6 +++++-
> > arch/x86/kernel/microcode_intel.c | 9 +++------
> > 5 files changed, 25 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> > index 4c25ab4..23b7e26 100644
> > --- a/arch/x86/include/asm/processor.h
> > +++ b/arch/x86/include/asm/processor.h
> > @@ -111,6 +111,8 @@ struct cpuinfo_x86 {
> > /* Index into per_cpu list: */
> > u16 cpu_index;
> > #endif
> > + /* CPU update signature */
> > + u32 x86_cpu_update;
>
> This should be cpu_microcode_version instead. We already know its x86 so the
> x86_ prefix is superfluous. 'cpu_update' is also rather ambigious and does not
> describe much.

Or shorter: 'cpu_ucode_version'.

> > diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
> > index 1edf5ba..150623a 100644
> > --- a/arch/x86/kernel/cpu/intel.c
> > +++ b/arch/x86/kernel/cpu/intel.c
> > @@ -364,6 +364,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
> >
> > early_init_intel(c);
> >
> > + /* Determine CPU update level */
> > + if (c->x86 >= 6 && !(cpu_has(c, X86_FEATURE_IA64))) {
> > + unsigned lo;
> > +
> > + wrmsr(MSR_IA32_UCODE_REV, 0, 0);
> > + /* The CPUID 1 fills in the MSR */
> > + cpuid_eax(1);
> > + rdmsr(MSR_IA32_UCODE_REV, lo, c->x86_cpu_update);
> > + }
>
>
> So, during the course of developing this, did it occur to you that
> other x86 CPUs should fill in this information as well?

Nah, those other vendors don't matter.

> If yes, did it occur to you to do the obvious git log
> arch/x86/kernel/microcode*.c command to figure out who else might be
> interested in this, and add them to the Cc: instead of forcing the
> maintainer to do that?

Thanks Ingo.

Andi, please take a look at
<arch/x86/kernel/microcode_amd.c::collect_cpu_info_amd()> on how to find
out the ucode patch level on AMD.

[..]

> > diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> > index ff1ae9b..e93c41f 100644
> > --- a/arch/x86/kernel/cpu/mcheck/mce.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> > @@ -220,8 +220,9 @@ static void print_mce(struct mce *m)
> > pr_cont("MISC %llx ", m->misc);
> >
> > pr_cont("\n");
> > - pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
> > - m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
> > + pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x CPU-UPDATE %u\n",
> > + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
> > + cpu_data(m->extcpu).x86_cpu_update);
>
> This text too should be microcode-version or so. Also, while at it please fix
> that printk() to not shout at users needlessly.

Right, and not "CPU-UPDATE" but "ucode ver:" or similar, which actually
says it is ucode.

> > diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
> > index 62ac8cb..cefcc27 100644
> > --- a/arch/x86/kernel/cpu/proc.c
> > +++ b/arch/x86/kernel/cpu/proc.c
> > @@ -132,8 +132,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
> > seq_printf(m, " [%d]", i);
> > }
> > }
> > + seq_printf(m, "\n");
> >
> > - seq_printf(m, "\n\n");
> > + if (c->x86_cpu_update)
> > + seq_printf(m, "cpu update\t: %u\n", c->x86_cpu_update);
> > +
> > + seq_printf(m, "\n");
>
> This too should say microcode version.

Yep.

> Also, please move the field to the logical place, next to "stepping:". The
> argument about parsers is bogus - anyone parsing /proc/cpuinfo is not in a
> fastpath, full stop.
>
> Also, the above sequence is rather suboptimal to begin with - we can and only
> want to execute a *single* seq_printf() there.
>
> > --- a/arch/x86/kernel/microcode_intel.c
> > +++ b/arch/x86/kernel/microcode_intel.c
> > @@ -161,12 +161,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
> > csig->pf = 1 << ((val[1] >> 18) & 7);
> > }
> >
> > - wrmsr(MSR_IA32_UCODE_REV, 0, 0);
> > - /* see notes above for revision 1.07. Apparent chip bug */
> > - sync_core();
> > - /* get the current revision from MSR 0x8B */
> > - rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
> > -
> > + csig->rev = c->x86_cpu_update;
> > pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
> > cpu_num, csig->sig, csig->pf, csig->rev);
> >
> >
> > @@ -300,6 +295,7 @@ static int apply_microcode(int cpu)
> > struct ucode_cpu_info *uci;
> > unsigned int val[2];
> > int cpu_num;
> > + struct cpuinfo_x86 *c = &cpu_data(cpu_num);
> >
> > cpu_num = raw_smp_processor_id();
> > uci = ucode_cpu_info + cpu;
> > @@ -335,6 +331,7 @@ static int apply_microcode(int cpu)
> > (mc_intel->hdr.date >> 16) & 0xff);
> >
> > uci->cpu_sig.rev = val[1];
> > + c->x86_cpu_update = val[1];
> >
> > return 0;
>
> Please factor out the reading of the microcode version - you have now created
> two duplicated open-coded versions of it in cpu.c and microcode_intel.c, with
> mismatching comments - not good.
>
> Also, in this branch:
>
> if (val[1] != mc_intel->hdr.rev) {
> pr_err("CPU%d update to revision 0x%x failed\n",
> cpu_num, mc_intel->hdr.rev);
> return -1;
> }
>
> it would be nice to put a check:
>
> WARN_ON_ONCE(val[1] != c->x86_cpu_update);
>
> To make sure that our notion of the version is still in sync with what the
> hardware's notion is. (it should be)

Thanks.

--
Regards/Gruss,
Boris.

2011-05-25 09:05:23

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


* Borislav Petkov <[email protected]> wrote:

> On Wed, May 25, 2011 at 08:54:51AM +0200, Ingo Molnar wrote:
> >
> > * Andi Kleen <[email protected]> wrote:
> >
> > > From: Andi Kleen <[email protected]>
> > >
> > > I got a request to make it easier to determine the microcode update level
> > > on Intel CPUs. This patch adds a new "cpu update" field to /proc/cpuinfo,
> > > which I added at the end to minimize impact on parsers.
> >
> > Agreed, that is a good idea, adding this to cpuinfo makes sense.
>
> Frankly, I'm not even 100% persuaded this is needed. The coretemp.c
> jump-through-hoops to get the ucode revision is maybe the only case
> that warrants adding that field to /proc/cpuinfo.

I've often wondered whether the CPU involved in a particular
bugreport has the latest microcode installed.

Sure we have /sys/devices/system/cpu/cpuN/microcode/version, but
that's both privileged to get and also has to be asked for
separately.

Arguably the microcode version is a natural extension to the existing
family/model/stepping sequence:

cpu family : 6
model : 26
stepping : 5

We'd now see:

cpu family : 6
model : 26
stepping : 5
ucode_version : 17

Where 'stepping' is a hardware revison number and 'ucode_version' is
a dual software/hw revision number.

> > > @@ -111,6 +111,8 @@ struct cpuinfo_x86 {
> > > /* Index into per_cpu list: */
> > > u16 cpu_index;
> > > #endif
> > > + /* CPU update signature */
> > > + u32 x86_cpu_update;
> >
> > This should be cpu_microcode_version instead. We already know its x86 so the
> > x86_ prefix is superfluous. 'cpu_update' is also rather ambigious and does not
> > describe much.
>
> Or shorter: 'cpu_ucode_version'.

We already know it's a cpu data structure, since it's called 'struct
cpuinfo_x86' and the local variable is named 'c' which is the typical
shortcut for that data structure.

so c->ucode_version is the right name here.

Thanks,

Ingo

2011-05-25 10:51:01

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

On Wed, May 25, 2011 at 11:05:01AM +0200, Ingo Molnar wrote:
> > Frankly, I'm not even 100% persuaded this is needed. The coretemp.c
> > jump-through-hoops to get the ucode revision is maybe the only case
> > that warrants adding that field to /proc/cpuinfo.

> I've often wondered whether the CPU involved in a particular bugreport
> has the latest microcode installed.

Ok, that's a good point, actually.

> Sure we have /sys/devices/system/cpu/cpuN/microcode/version, but
> that's both privileged to get

Not only that but you have to load the ucode driver to be able to read
it. /proc/cpuinfo looks like the easiest and most generic place.

> and also has to be asked for separately.

yes.

>
> Arguably the microcode version is a natural extension to the existing
> family/model/stepping sequence:
>
> cpu family : 6
> model : 26
> stepping : 5
>
> We'd now see:
>
> cpu family : 6
> model : 26
> stepping : 5
> ucode_version : 17
>
> Where 'stepping' is a hardware revison number and 'ucode_version' is
> a dual software/hw revision number.

Right.

Btw, can we dump the ucode version in hex since ours are much easier to
read that way:

[86483.770976] microcode: CPU0: patch_level=0x010000c4
[86483.826987] microcode: CPU1: patch_level=0x010000c4
[86483.835071] microcode: CPU2: patch_level=0x010000c4
...

I guess for Intel the ucode version format won't matter that much.

> > > > @@ -111,6 +111,8 @@ struct cpuinfo_x86 {
> > > > /* Index into per_cpu list: */
> > > > u16 cpu_index;
> > > > #endif
> > > > + /* CPU update signature */
> > > > + u32 x86_cpu_update;
> > >
> > > This should be cpu_microcode_version instead. We already know its x86 so the
> > > x86_ prefix is superfluous. 'cpu_update' is also rather ambigious and does not
> > > describe much.
> >
> > Or shorter: 'cpu_ucode_version'.
>
> We already know it's a cpu data structure, since it's called 'struct
> cpuinfo_x86' and the local variable is named 'c' which is the typical
> shortcut for that data structure.
>
> so c->ucode_version is the right name here.

even better.

Thanks.

--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

2011-05-25 11:29:20

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


* Borislav Petkov <[email protected]> wrote:

> Btw, can we dump the ucode version in hex since ours are much easier to
> read that way:
>
> [86483.770976] microcode: CPU0: patch_level=0x010000c4
> [86483.826987] microcode: CPU1: patch_level=0x010000c4
> [86483.835071] microcode: CPU2: patch_level=0x010000c4
> ...

How is that version constructed and iterated, or example is the
0x01000000 bit always set?

If it's always set then it might make sense to turn this into a more
human-readable version number: mask out the 0x01000000 and report
0xc4 as 194? Or is the *real* version above just '4'?

Should 0x010000c4 perhaps be printed as 1.10.4?

> I guess for Intel the ucode version format won't matter that much.

Well, if Intel does similar encodings as AMD, then it would be nice
to turn that into human-readable version strings as well.

Thanks,

Ingo

2011-05-25 11:30:47

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


* Borislav Petkov <[email protected]> wrote:

> > Sure we have /sys/devices/system/cpu/cpuN/microcode/version, but
> > that's both privileged to get
>
> Not only that but you have to load the ucode driver to be able to
> read it. /proc/cpuinfo looks like the easiest and most generic
> place.

Indeed, that's a good point - i sometimes boot kernels without the
microcode driver built in, so then no microcode gets loaded. If i
report a weird bug then whoever looks at my bug report might notice
the ancient ucode version number in /proc/cpuinfo.

Thanks,

Ingo

Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

On Wed, 25 May 2011, Ingo Molnar wrote:
> * Andi Kleen <[email protected]> wrote:
> > --- a/arch/x86/include/asm/processor.h
> > +++ b/arch/x86/include/asm/processor.h
> > @@ -111,6 +111,8 @@ struct cpuinfo_x86 {
> > /* Index into per_cpu list: */
> > u16 cpu_index;
> > #endif
> > + /* CPU update signature */
> > + u32 x86_cpu_update;
>
> Btw,. there's one subtle thing here: it's possible to have *different*
> microcode versions on different CPUs in te system.
>
> This can happen if for example the BIOS somehow does not apply the right
> microcode to all CPUs. It can also happen if physically different microcode
> version CPUs are mixed. In theory people can mix steppings as well.

In PRACTICE people WILL mix steppings as well.

I have some boxes like that, here. It is especifically supported by
server vendors, BIOS vendors, and also by Intel. And it happens easily
when people decide to add processors to a non-maxed-out box a few years
after it has been acquired.

Heck, I am pretty sure at least one of those boxes with mismatched
steppings we have here was SOLD to us by IBM Brazil already with
mismatched steppings installed.

So, please consider this a field report that mixed CPU steppings in X86
SMP boxes are somewhat common.

> been brought online, and print some very visible boot warning message if
> there's a mismatch between the steppings or microcode versions? Perhaps also
> taint the kernel.

Mismatched microcode within processors with the same processor
identification (the full one, as used by the microcode matching logic)
would be something to *fix* in the first place (we should be updating CPU
microcode before we bring the CPU online anyway) and it would be nice to
warn if we don't have the suitable microcode and the BIOS screwed it up,
indeed...

But mismatched stepping? We cannot taint or warn on mismatched stepping,
it is an explicitly supported configuration by the hardware and firmware
vendors.

BTW, microcode versions, at least for Intel, do not match across different
steppings (i.e. different processor signatures). They do not always match
even across CPUs with the same processor signature but different processor
flags! In fact, it is RARE when they do match.

--
"One disk to rule them all, One disk to find them. One disk to bring
them all and in the darkness grind them. In the Land of Redmond
where the shadows lie." -- The Silicon Valley Tarot
Henrique Holschuh

2011-05-25 16:55:23

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

On Wed, May 25, 2011 at 08:54:51AM +0200, Ingo Molnar wrote:
> > + /* CPU update signature */
> > + u32 x86_cpu_update;
>
> This should be cpu_microcode_version instead. We already know its x86 so the
> x86_ prefix is superfluous. 'cpu_update' is also rather ambigious and does not
I followed the convention of the other fields in cpu_data, but I'll drop
the prefix.

"cpu update" was the name that was suggested to me. Sorry if it's a
bit vague. Apparently calling it microcode is not politically correct.
I'll keep it for now. If you want to really change it please send
another email.

>
> So, during the course of developing this, did it occur to you that other x86
> CPUs should fill in this information as well?

Yes, it did in fact, but I hope someone else will do that because I have no way to test it.


> > - /* see notes above for revision 1.07. Apparent chip bug */

This particular code pattern has no chip bug. The CPUID is required
by the documentation! So whoever wrote it didn't read the documentation.
So yes I dropped that obviously bogus comment.

>
> Why? If you think it's not a bug (but got documented meanwhile as the official
It always was documented this way.

> way of updating the microcode and reading out the version) then update the
> comments in a *separate* patch, and update the *other* reference to it as well.

I'm not sure about the other references. The documentation just
states the CPUID is needed for reading the revision.

Anyways I'll just leave the comment around for now.


> > +
> > + seq_printf(m, "\n");
>
> This too should say microcode version.
>
> Also, please move the field to the logical place, next to "stepping:". The
> argument about parsers is bogus - anyone parsing /proc/cpuinfo is not in a
> fastpath, full stop.

The rationale was not cycles, but if someone was stupid enough
to hardcode the line number offsets while parsing. You never know with user
space /proc parsers and assuming the worst is always the best.

I thought it was safest to add new fields at the end.

>
> Also, the above sequence is rather suboptimal to begin with - we can and only
> want to execute a *single* seq_printf() there.

Sorry I don't understand the comment. Anyways as you say above
it's no fast path, so it shouldn't matter.

> Please factor out the reading of the microcode version - you have now created
> two duplicated open-coded versions of it in cpu.c and microcode_intel.c, with
> mismatching comments - not good.

Huh? There's only a single one now.

> it would be nice to put a check:
>
> WARN_ON_ONCE(val[1] != c->x86_cpu_update);

Ok.

-Andi

2011-05-25 16:58:48

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

> > This can happen if for example the BIOS somehow does not apply the right
> > microcode to all CPUs. It can also happen if physically different microcode
> > version CPUs are mixed. In theory people can mix steppings as well.
>
> In PRACTICE people WILL mix steppings as well.

Yes exactly. I used to have a system with one P3 supporting FXSAVE
and other not :-) I had to fix the kernel back then to support this.
The system worked perfectly fine after the kernel was fixed.

So the check proposed is a bad idea. It would trigger
on real boxes which don't have any obvious problems.

-Andi

2011-05-25 18:24:33

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


* Andi Kleen <[email protected]> wrote:

> > > This can happen if for example the BIOS somehow does not apply the right
> > > microcode to all CPUs. It can also happen if physically different microcode
> > > version CPUs are mixed. In theory people can mix steppings as well.
> >
> > In PRACTICE people WILL mix steppings as well.
>
> Yes exactly. I used to have a system with one P3 supporting FXSAVE
> and other not :-) I had to fix the kernel back then to support this.
> The system worked perfectly fine after the kernel was fixed.

I'm not concerned about the cases where it works fine - i'm concerned
about cases where it's not fine.

Many years ago i had an old P3 system where i mixed steppings and it
was not stable, it would crash after i have put some load on the
system.

> So the check proposed is a bad idea. It would trigger on real boxes
> which don't have any obvious problems.

You are quite naive about mixed stepping i have to say - mixing
steppings was always a risk and can come with various problems:

http://mysite.verizon.net/pchardwarelinks/mixed.htm

Mixing processor steppings (revisions) in a dual environment doesn't
always work as well as it should. As a rule of thumb, one stepping
difference between two CPUs is generally acceptable. Two steppings is
pushing it. Note that some revisions of some operating systems may be
more forgiving than others (like WinNT) concerning mixed steppings.
The motherboard manufacturer can also give you information about
which mixed steppings (if any) are compatible with the motherboard.
It's always best to buy matched pairs or to find another chip with
the same S-Spec number. Although in many cases, finding an identical
chip is quite difficult.

http://www.intel.com/support/motherboards/server/sb/cs-022150.htm

Mixed steppings of processors are only supported with the following
paired combinations of A1 and A2 steppings of the Intel? Itanium? 2
Processor with up to 9 MB L3 cache, identified by the package S-Spec
numbers (see the Intel? Itanium? 2 Processor Identification and
Package Information table for details):

* SL7SD and SL8CY
* SL7ED and SL8CX
* SL7EC and SL8CW
* SL7EB and SL8CV
* SL87H and SL8CU
* SL7EF and SL8CZ

Intel? Server Platform SR870BH2 and Intel? Server Platform SR870BN4
fully supports mixing A1 and A2 stepping processors

http://h20000.www2.hp.com/bizsupport/TechSupport/Document.jsp?lang=en&cc=us&objectID=c01209579&jumpid=reg_R1002_USEN

While HP ProLiant servers support mixed AMD Opteron processor
steppings within the same letter step, HP is recommending that the
processor with the lowest stepping be installed in the Boot Strap
Processor (BSP) slot and that the System ROM be updated to the latest
available version. For example, an AMD Opteron 800 with Stepping C0
can be mixed with an AMD Opteron 800 with Stepping CG .

So displaying a non-fatal 'info' message about it would in fact be
rather wise. Nothing scary, but we'd like to be informed.

Thanks,

Ingo

2011-05-25 18:59:35

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


* Andi Kleen <[email protected]> wrote:

> On Wed, May 25, 2011 at 08:54:51AM +0200, Ingo Molnar wrote:
> >
> > > /* Index into per_cpu list: */
> > > u16 cpu_index;
> > >
> > > + /* CPU update signature */
> > > + u32 x86_cpu_update;
> >
> > This should be cpu_microcode_version instead. We already know its x86 so the
> > x86_ prefix is superfluous. 'cpu_update' is also rather ambigious and does not
>
> I followed the convention of the other fields in cpu_data, [...]

Look at the context diff above, it has 'cpu_index', so no, there was no
consistent convention to follow.

The fields are arguably pretty inconsistent in 'struct cpuinfo_x86',
but when adding new fields they should be sane.

> [...] but I'll drop the prefix.
>
> "cpu update" was the name that was suggested to me. Sorry if it's a
> bit vague. Apparently calling it microcode is not politically
> correct. I'll keep it for now. If you want to really change it
> please send another email.
>
> > So, during the course of developing this, did it occur to you
> > that other x86 CPUs should fill in this information as well?
>
> Yes, it did in fact, but I hope someone else will do that because I
> have no way to test it.

And not Cc:-ing anyone and not mentioning that the microcode driver
of other vendors needs to be updated was the right solution to raise
attention to that lack of means of testing? :-)

> > > - /* see notes above for revision 1.07. Apparent chip bug */
>
> This particular code pattern has no chip bug. The CPUID is required
> by the documentation! So whoever wrote it didn't read the
> documentation. So yes I dropped that obviously bogus comment.

And you thus 'obviously' forked away the reading of the microcode
version into another file, with the same 'obviously wrong' comment
left behind in another place?

Not very smart from you.

> > Why? If you think it's not a bug (but got documented meanwhile as
> > the official
>
> It always was documented this way.

FYI, the x86 microcode driver actually predates official public
documentation on the topic, so no, it was not always documented that
way ;-)

[ I suspect the CPUID was 'documented' after it was found out the
hard way that writing the MSR and reading it out without the CPUID
can give random junk as the version number. ]

> > way of updating the microcode and reading out the version) then
> > update the comments in a *separate* patch, and update the *other*
> > reference to it as well.
>
> I'm not sure about the other references. The documentation just
> states the CPUID is needed for reading the revision.
>
> Anyways I'll just leave the comment around for now.
>
> > > +
> > > + seq_printf(m, "\n");
> >
> > This too should say microcode version.
> >
> > Also, please move the field to the logical place, next to
> > "stepping:". The argument about parsers is bogus - anyone parsing
> > /proc/cpuinfo is not in a fastpath, full stop.
>
> The rationale was not cycles, but if someone was stupid enough to
> hardcode the line number offsets while parsing. You never know with
> user space /proc parsers and assuming the worst is always the best.
>
> I thought it was safest to add new fields at the end.

No, it's not a problem to add /proc/cpuinfo fields in the middle -
please add this new field to the logical place.

> > Also, the above sequence is rather suboptimal to begin with - we can and only
> > want to execute a *single* seq_printf() there.
>
> Sorry I don't understand the comment. Anyways as you say above
> it's no fast path, so it shouldn't matter.
>
> > Please factor out the reading of the microcode version - you have now created
> > two duplicated open-coded versions of it in cpu.c and microcode_intel.c, with
> > mismatching comments - not good.
>
> Huh? There's only a single one now.

That's not actually true. With your patches applied a trivial git
grep shows the two places reading the microcode version:

arch/x86/kernel/cpu/intel.c: wrmsr(MSR_IA32_UCODE_REV, 0, 0);
arch/x86/kernel/cpu/intel.c: rdmsr(MSR_IA32_UCODE_REV, lo, c->x86_cpu_update);
arch/x86/kernel/microcode_intel.c: wrmsr(MSR_IA32_UCODE_REV, 0, 0);
arch/x86/kernel/microcode_intel.c: rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);

As i said you have now created two duplicated open-coded versions of
it in cpu.c and microcode_intel.c, with mismatching comments - not
good.

Andi, are you being obtuse and are you wasting my time intentionally?
You could have checked this yourself.

Thanks,

Ingo

Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

On Wed, 25 May 2011, Ingo Molnar wrote:
> So displaying a non-fatal 'info' message about it would in fact be
> rather wise. Nothing scary, but we'd like to be informed.

A printk with levels info or notice, I do agree to be a good idea. In fact,
I like the idea a lot.

WARN() and kernel tainting, which is what I understand was being proposed
before, I cannot agree with.

OTOH, IMO mismatched microcode should be grounds for more serious measures.
And that includes fixing the annoying userspace ABI that encourages bad
behaviour in the first place and makes it harder to do the proper thing.

--
"One disk to rule them all, One disk to find them. One disk to bring
them all and in the darkness grind them. In the Land of Redmond
where the shadows lie." -- The Silicon Valley Tarot
Henrique Holschuh

2011-05-25 19:06:08

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


I hadn't really expected this simple patchkit to generate
that much email :-)

> Many years ago i had an old P3 system where i mixed steppings and it
> was not stable, it would crash after i have put some load on the
> system.

Ok.

But it's not clear to me how to reliably do it for the microcode
at boot. Because a future microcode update could fix it up, but the kernel
can't know if there will be a microcode update or not.
So I don't think it can be done at boot at least.

If you want me to rewrite the microcode update driver or something
like that that's outside the scope of my patchkit, sorry.

-Andi

2011-05-25 19:14:07

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

On Wed, May 25, 2011 at 08:59:12PM +0200, Ingo Molnar wrote:
> Look at the context diff above, it has 'cpu_index', so no, there was no
> consistent convention to follow.

Well all the CPU specific fields. Anyways I renamed it now.

> attention to that lack of means of testing? :-)
>
> > > > - /* see notes above for revision 1.07. Apparent chip bug */
> >
> > This particular code pattern has no chip bug. The CPUID is required
> > by the documentation! So whoever wrote it didn't read the
> > documentation. So yes I dropped that obviously bogus comment.
>
> And you thus 'obviously' forked away the reading of the microcode
> version into another file, with the same 'obviously wrong' comment
> left behind in another place?

I just wrote new code with correct comments.

> > It always was documented this way.
>
> FYI, the x86 microcode driver actually predates official public

Are you sure you're not confusing that with the AMD driver?
AFAIK Intel was always documented.


> No, it's not a problem to add /proc/cpuinfo fields in the middle -
> please add this new field to the logical place.

Ok.

> >
> > Huh? There's only a single one now.
>
> That's not actually true. With your patches applied a trivial git
> grep shows the two places reading the microcode version:

Ok you count the re-reading. Fair enough. I guess I can remove
the comment there too.

BTW before my patches there were four places, I collapsed it down
to two if you count that.

-Andi

2011-05-25 19:36:20

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


* Henrique de Moraes Holschuh <[email protected]> wrote:

> On Wed, 25 May 2011, Ingo Molnar wrote:
> >
> > So displaying a non-fatal 'info' message about it would in fact
> > be rather wise. Nothing scary, but we'd like to be informed.
>
> A printk with levels info or notice, I do agree to be a good idea.
> In fact, I like the idea a lot.
>
> WARN() and kernel tainting, which is what I understand was being
> proposed before, I cannot agree with.

Yeah, the taint is probably overdoing it - especially considering
that mixing certain steppings is explicitly supported.

> OTOH, IMO mismatched microcode should be grounds for more serious
> measures. And that includes fixing the annoying userspace ABI that
> encourages bad behaviour in the first place and makes it harder to
> do the proper thing.

Well, a problem the microcode driver has is that it needs that
microcode blob. So we cannot really load it right at bootup (unless
we promote the image to the initrd - somewhat hairy).

Thanks,

Ingo

2011-05-25 19:45:16

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision


* Andi Kleen <[email protected]> wrote:

> I hadn't really expected this simple patchkit to generate
> that much email :-)
>
> > Many years ago i had an old P3 system where i mixed steppings and
> > it was not stable, it would crash after i have put some load on
> > the system.
>
> Ok.
>
> But it's not clear to me how to reliably do it for the microcode at
> boot. Because a future microcode update could fix it up, but the
> kernel can't know if there will be a microcode update or not. So I
> don't think it can be done at boot at least.

Well, my suggestion was mainly about stepping mismatches, and
microcode updates do not generally change the stepping.

If the steppings are the same and the microcode version is not the
same, doesn't that at least indicate some potential BIOS badness?
Most BIOSes will load a certain version of the microcode straight
away, and do that on all CPUs.

So warning about that might be useful as well.

But yeah, i could be wrong about this, if there's many false
positives we can tone it down or remove it altogether - right now we
simply do not know. We could float it a bit and see what happens,
it's not like these kinds of checks are expensive.

> If you want me to rewrite the microcode update driver or something
> like that that's outside the scope of my patchkit, sorry.

No, i'd be perfectly happy if you began sending obviously correct
small patches already, without me having to baby-sit you through
trivial cleanliness issues *every* *single* *time*. You are a very
maintenance-intense contributor with an attitude right now. If that
situation improves we can do more complex things.

Thanks,

Ingo

2011-05-25 21:08:17

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

On Wed, May 25, 2011 at 07:28:52AM -0400, Ingo Molnar wrote:
>
> * Borislav Petkov <[email protected]> wrote:
>
> > Btw, can we dump the ucode version in hex since ours are much easier to
> > read that way:
> >
> > [86483.770976] microcode: CPU0: patch_level=0x010000c4
> > [86483.826987] microcode: CPU1: patch_level=0x010000c4
> > [86483.835071] microcode: CPU2: patch_level=0x010000c4
> > ...
>
> How is that version constructed and iterated, or example is the
> 0x01000000 bit always set?
>
> If it's always set then it might make sense to turn this into a more
> human-readable version number: mask out the 0x01000000 and report
> 0xc4 as 194? Or is the *real* version above just '4'?
>
> Should 0x010000c4 perhaps be printed as 1.10.4?

Nah, splitting this doesn't give you any information we could use,
besides this format is the only format our ucode nomenclature uses so
having it different in Linux might cause confusion.

>
> > I guess for Intel the ucode version format won't matter that much.
>
> Well, if Intel does similar encodings as AMD,

I hardly doubt that.

> then it would be nice to turn that into human-readable version strings
> as well.

I think leaving it as a hex number would fit both vendors adequately.

Thanks.

--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

2011-05-29 10:22:56

by Jan Ceuleers

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86, intel: Output microcode revision

On 25/05/11 01:03, Andi Kleen wrote:
> From: Andi Kleen<[email protected]>
>
> I got a request to make it easier to determine the microcode update level
> on Intel CPUs. This patch adds a new "cpu update" field to /proc/cpuinfo,
> which I added at the end to minimize impact on parsers.
>
> The update level is also outputed on fatal machine checks together
> with the other CPUID model information.


s/outputed/output/

Jan