2023-12-06 17:29:15

by Jann Horn

[permalink] [raw]
Subject: [PATCH] x86/microcode: Be more verbose, especially about loading errors

The AMD ucode loader contains several checks for corrupted ucode blobs that
only log with pr_debug(); make them pr_err(), corrupted ucode blobs are
bad.

Also make both microcode loaders a bit more verbose about whether they
found ucode blobs at all and whether they found ucode for the specific CPU.

Signed-off-by: Jann Horn <[email protected]>
---
compile-tested only since I don't have an easy setup for booting test
kernels on bare metal

arch/x86/kernel/cpu/microcode/amd.c | 34 +++++++++++++++++----------
arch/x86/kernel/cpu/microcode/intel.c | 24 ++++++++++++++++---
2 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 13b45b9c806d..2312ddb031b5 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -136,13 +136,13 @@ static bool verify_container(const u8 *buf, size_t buf_size)
u32 cont_magic;

if (buf_size <= CONTAINER_HDR_SZ) {
- pr_debug("Truncated microcode container header.\n");
+ pr_err("Truncated microcode container header.\n");
return false;
}

cont_magic = *(const u32 *)buf;
if (cont_magic != UCODE_MAGIC) {
- pr_debug("Invalid magic value (0x%08x).\n", cont_magic);
+ pr_err("Invalid magic value (0x%08x).\n", cont_magic);
return false;
}

@@ -163,7 +163,7 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size)

cont_type = hdr[1];
if (cont_type != UCODE_EQUIV_CPU_TABLE_TYPE) {
- pr_debug("Wrong microcode container equivalence table type: %u.\n",
+ pr_err("Wrong microcode container equivalence table type: %u.\n",
cont_type);
return false;
}
@@ -173,7 +173,7 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size)
equiv_tbl_len = hdr[2];
if (equiv_tbl_len < sizeof(struct equiv_cpu_entry) ||
buf_size < equiv_tbl_len) {
- pr_debug("Truncated equivalence table.\n");
+ pr_err("Truncated equivalence table.\n");
return false;
}

@@ -194,7 +194,7 @@ __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize)
const u32 *hdr;

if (buf_size < SECTION_HDR_SIZE) {
- pr_debug("Truncated patch section.\n");
+ pr_err("Truncated patch section.\n");
return false;
}

@@ -203,13 +203,13 @@ __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize)
p_size = hdr[1];

if (p_type != UCODE_UCODE_TYPE) {
- pr_debug("Invalid type field (0x%x) in container file section header.\n",
+ pr_err("Invalid type field (0x%x) in container file section header.\n",
p_type);
return false;
}

if (p_size < sizeof(struct microcode_header_amd)) {
- pr_debug("Patch of size %u too short.\n", p_size);
+ pr_err("Patch of size %u too short.\n", p_size);
return false;
}

@@ -284,13 +284,13 @@ verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size)
* size sh_psize, as the section claims.
*/
if (buf_size < sh_psize) {
- pr_debug("Patch of size %u truncated.\n", sh_psize);
+ pr_err("Patch of size %u truncated.\n", sh_psize);
return -1;
}

ret = __verify_patch_size(family, sh_psize, buf_size);
if (!ret) {
- pr_debug("Per-family patch size mismatch.\n");
+ pr_err("Per-family patch size mismatch.\n");
return -1;
}

@@ -423,8 +423,10 @@ static int __apply_microcode_amd(struct microcode_amd *mc)

/* verify patch application was successful */
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- if (rev != mc->hdr.patch_id)
+ if (rev != mc->hdr.patch_id) {
+ pr_err("CPU did not accept microcode update!\n");
return -1;
+ }

return 0;
}
@@ -451,14 +453,18 @@ static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, siz
scan_containers(ucode, size, &desc);

mc = desc.mc;
- if (!mc)
+ if (!mc) {
+ pr_info("Found no patches for this CPU model in the microcode file\n");
return ret;
+ }

/*
* Allow application of the same revision to pick up SMT-specific
* changes even if the revision of the other SMT thread is already
* up-to-date.
*/
+ pr_info("CPU had microcode revision 0x%x, latest available patch is 0x%x\n",
+ old_rev, mc->hdr.patch_id);
if (old_rev > mc->hdr.patch_id)
return ret;

@@ -507,8 +513,10 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_
ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax;

find_blobs_in_containers(cpuid_1_eax, &cp);
- if (!(cp.data && cp.size))
+ if (!(cp.data && cp.size)) {
+ pr_info("Unable to find any microcode blob for early loading\n");
return;
+ }

if (early_apply_microcode(cpuid_1_eax, ed->old_rev, cp.data, cp.size))
native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy);
@@ -883,7 +891,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device)
snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);

if (request_firmware_direct(&fw, (const char *)fw_name, device)) {
- pr_debug("failed to load file %s\n", fw_name);
+ pr_err("failed to load file %s\n", fw_name);
goto out;
}

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 070426b9895f..c56819dc6730 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -266,6 +266,7 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
struct microcode_intel *patch = NULL;
u32 cur_rev = uci->cpu_sig.rev;
unsigned int mc_size;
+ bool any_matches = false;

for (; size >= sizeof(struct microcode_header_intel); size -= mc_size, data += mc_size) {
mc_header = (struct microcode_header_intel *)data;
@@ -277,6 +278,7 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,

if (!intel_find_matching_signature(data, &uci->cpu_sig))
continue;
+ any_matches = true;

/*
* For saving the early microcode, find the matching revision which
@@ -296,7 +298,21 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
cur_rev = mc_header->rev;
}

- return size ? NULL : patch;
+ if (size) {
+ pr_err("Unable to parse microcode blob!\n");
+ return NULL;
+ }
+
+ if (!save) {
+ if (patch)
+ pr_info("Found microcode update\n");
+ else if (any_matches)
+ pr_info("Found microcode update but it's not newer\n");
+ else
+ pr_info("Found no microcode update for this CPU\n");
+ }
+
+ return patch;
}

static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
@@ -373,8 +389,10 @@ static __init struct microcode_intel *get_microcode_blob(struct ucode_cpu_info *
if (!load_builtin_intel_microcode(&cp))
cp = find_microcode_in_initrd(ucode_path);

- if (!(cp.data && cp.size))
+ if (!(cp.data && cp.size)) {
+ pr_info("Unable to find any microcode blob for early loading\n");
return NULL;
+ }

intel_collect_cpu_info(&uci->cpu_sig);

@@ -612,7 +630,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
c->x86, c->x86_model, c->x86_stepping);

if (request_firmware_direct(&firmware, name, device)) {
- pr_debug("data file %s load failed\n", name);
+ pr_info("data file %s load failed\n", name);
return UCODE_NFOUND;
}


base-commit: bee0e7762ad2c6025b9f5245c040fcc36ef2bde8
--
2.43.0.472.g3155946c3a-goog


2023-12-06 19:58:43

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH] x86/microcode: Be more verbose, especially about loading errors

On Wed, Dec 06, 2023 at 06:28:44PM +0100, Jann Horn wrote:
> The AMD ucode loader contains several checks for corrupted ucode blobs that
> only log with pr_debug(); make them pr_err(), corrupted ucode blobs are
> bad.
>
> Also make both microcode loaders a bit more verbose about whether they
> found ucode blobs at all and whether they found ucode for the specific CPU.

So far, so good.

The only thing I'm missing here is the *why*.

There's merit in not complaining about corrupted microcode blobs because
they won't be loaded anyway: no harm, no foul.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2023-12-06 20:24:40

by Jann Horn

[permalink] [raw]
Subject: Re: [PATCH] x86/microcode: Be more verbose, especially about loading errors

On Wed, Dec 6, 2023 at 8:58 PM Borislav Petkov <[email protected]> wrote:
> On Wed, Dec 06, 2023 at 06:28:44PM +0100, Jann Horn wrote:
> > The AMD ucode loader contains several checks for corrupted ucode blobs that
> > only log with pr_debug(); make them pr_err(), corrupted ucode blobs are
> > bad.
> >
> > Also make both microcode loaders a bit more verbose about whether they
> > found ucode blobs at all and whether they found ucode for the specific CPU.
>
> So far, so good.
>
> The only thing I'm missing here is the *why*.
>
> There's merit in not complaining about corrupted microcode blobs because
> they won't be loaded anyway: no harm, no foul.

Well, yes, except that if no microcode blob is loaded, you're not
gonna have the errata fixes and/or security mitigations that you might
expect to have.

2023-12-06 20:54:03

by Jann Horn

[permalink] [raw]
Subject: Re: [PATCH] x86/microcode: Be more verbose, especially about loading errors

On Wed, Dec 6, 2023 at 9:32 PM Borislav Petkov <[email protected]> wrote:
>
> On Wed, Dec 06, 2023 at 09:23:48PM +0100, Jann Horn wrote:
> > Well, yes, except that if no microcode blob is loaded, you're not
> > gonna have the errata fixes and/or security mitigations that you might
> > expect to have.
>
> We say that too:
>
> microcode: Current revision: 0x000000f0
> microcode: Updated early from: 0x000000be
>
> That second line would be missing.

Ah, right. I guess that's decent for diagnostics, though I think it
would be nice to have a more explicit message about not finding a
microcode update, since otherwise you'd have to read the kernel
sources to figure out that you have to check for a missing second
line.

> Therefore, the mitigation fixes all report that too. Look for
> "[Mm]icrocode" in the mitigation strings in arch/x86/kernel/cpu/bugs.c.

Yeah, fair, I guess that's a fairly visible indicator that something's
wrong with microcode. (Though it doesn't tell you whether your
microcode is just outdated or you have no microcode for the CPU
family.)

Well, I don't really feel particularly attached to this patch.

2023-12-07 15:35:21

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH] x86/microcode: Be more verbose, especially about loading errors

On Wed, Dec 06, 2023 at 09:51:58PM +0100, Jann Horn wrote:
> Ah, right. I guess that's decent for diagnostics, though I think it
> would be nice to have a more explicit message about not finding a
> microcode update, since otherwise you'd have to read the kernel
> sources to figure out that you have to check for a missing second
> line.

I keep preaching scripting something around:

$ grep microcode /proc/cpuinfo | sort | uniq -c
16 microcode : 0x800820d

We also have:

$ grep -r . /sys/devices/system/cpu/vulnerabilities/
/sys/devices/system/cpu/vulnerabilities/spectre_v2:Mitigation: Retpolines, IBPB: conditional, STIBP: disabled, RSB filling, PBRSB-eIBRS: Not affected
...

You could make sure that this says "no microcode" in the microcode
missing case or so and then grep that on large fleets, massage results
and dig into dmesg only on those which fail. Hopefully a small number.

> Yeah, fair, I guess that's a fairly visible indicator that something's
> wrong with microcode. (Though it doesn't tell you whether your
> microcode is just outdated or you have no microcode for the CPU
> family.)

So what you could do on AMD (probably similar on Intel too) is to fetch

git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git

scan the microcode blobs in amd-ucode/

==========================================================
| inst_cpu | err_msk | err_cmp | eq_cpu | res |
==========================================================
| 0x00800f82 | 0x00000000 | 0x00000000 | 0x8082 | 0x0000 |
| 0x00830f10 | 0x00000000 | 0x00000000 | 0x8310 | 0x0000 |
| 0x008a0f00 | 0x00000000 | 0x00000000 | 0x8a00 | 0x0000 |
| 0x00800f12 | 0x00000000 | 0x00000000 | 0x8012 | 0x0000 |
| 0x00000000 | 0x00000000 | 0x00000000 | 0x0000 | 0x0000 |
----------------------------------------------------------
ID: 0x0800820d, CPU rev ID: 0x00008082
ID: 0x0830107b, CPU rev ID: 0x00008310
ID: 0x08a00008, CPU rev ID: 0x00008a00
ID: 0x0800126e, CPU rev ID: 0x00008012


ID are the latest released patch revisions. You then compare them to the
revision on the respective machine using that CPU rev ID which can be
computed from CPUID(1).EAX (inst_cpu) using the table above. Table is
also in the blob.

In the example above my workstation has microcode revision 0x800820d and
CPUID(1).EAX on it is 0x00800f82.

So all up to date. :-)

See, easy peasy. :-P

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2023-12-07 15:55:47

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH] x86/microcode: Be more verbose, especially about loading errors

On Wed, Dec 06, 2023 at 09:23:48PM +0100, Jann Horn wrote:
> Well, yes, except that if no microcode blob is loaded, you're not
> gonna have the errata fixes and/or security mitigations that you might
> expect to have.

We say that too:

microcode: Current revision: 0x000000f0
microcode: Updated early from: 0x000000be

That second line would be missing.

And updated microcode != microcode version for all the mitigations is
even available.

Therefore, the mitigation fixes all report that too. Look for
"[Mm]icrocode" in the mitigation strings in arch/x86/kernel/cpu/bugs.c.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette