2019-08-23 08:15:54

by Mihai Carabas

[permalink] [raw]
Subject: [PATCH] x86/microcode: Update microcode for all cores in parallel

From: Ashok Raj <[email protected]>

Microcode update was changed to be serialized due to restrictions after
Spectre days. Updating serially on a large multi-socket system can be
painful since we do this one CPU at a time. Cloud customers have expressed
discontent as services disappear for a prolonged time. The restriction is
that only one core goes through the update while other cores are quiesced.
The update is now done only on the first thread of each core while other
siblings simply wait for this to complete.

Signed-off-by: Ashok Raj <[email protected]>
Signed-off-by: Mihai Carabas <[email protected]>
---
arch/x86/kernel/cpu/microcode/core.c | 44 ++++++++++++++++++++++++-----------
arch/x86/kernel/cpu/microcode/intel.c | 14 ++++-------
2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index cb0fdca..577b223 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -63,11 +63,6 @@
*/
static DEFINE_MUTEX(microcode_mutex);

-/*
- * Serialize late loading so that CPUs get updated one-by-one.
- */
-static DEFINE_RAW_SPINLOCK(update_lock);
-
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];

struct cpu_info_ctx {
@@ -566,9 +561,23 @@ static int __reload_late(void *info)
if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
return -1;

- raw_spin_lock(&update_lock);
- apply_microcode_local(&err);
- raw_spin_unlock(&update_lock);
+ /*
+ * Update just on the first CPU in the core. Other siblings
+ * get the update automatically according to Intel SDM 9.11.6.3
+ * Update in a System Supporting Intel Hyper-Threading Technology
+ * Intel Hyper-Threading Technology has implications on the loading of the
+ * microcode update. The update must be loaded for each core in a physical
+ * processor. Thus, for a processor supporting Intel Hyper-Threading
+ * Technology, only one logical processor per core is required to load the
+ * microcode update. Each individual logical processor can independently
+ * load the update. However, MP initialization must provide some mechanism
+ * (e.g. a software semaphore) to force serialization of microcode update
+ * loads and to prevent simultaneous load attempts to the same core.
+ */
+ if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
+ apply_microcode_local(&err);
+ else
+ goto wait_for_siblings;

/* siblings return UCODE_OK because their engine got updated already */
if (err > UCODE_NFOUND) {
@@ -578,15 +587,24 @@ static int __reload_late(void *info)
ret = 1;
}

+wait_for_siblings:
/*
- * Increase the wait timeout to a safe value here since we're
- * serializing the microcode update and that could take a while on a
- * large number of CPUs. And that is fine as the *actual* timeout will
- * be determined by the last CPU finished updating and thus cut short.
+ * Since we are doing all cores in parallel, and the other
+ * sibling threads just do a rev update, there is no need to
+ * increase the timeout
*/
- if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
+ if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
panic("Timeout during microcode update!\n");

+ /*
+ * At least one thread has completed update in each core.
+ * For others, simply call the update to make sure the
+ * per-cpu cpuinfo can be updated with right microcode
+ * revision.
+ */
+ if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
+ apply_microcode_local(&err);
+
return ret;
}

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce799cf..884d02d 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -793,7 +793,6 @@ static enum ucode_state apply_microcode_intel(int cpu)
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_intel *mc;
enum ucode_state ret;
- static int prev_rev;
u32 rev;

/* We should bind the task to the CPU */
@@ -836,14 +835,11 @@ static enum ucode_state apply_microcode_intel(int cpu)
return UCODE_ERROR;
}

- if (rev != prev_rev) {
- pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
- rev,
- mc->hdr.date & 0xffff,
- mc->hdr.date >> 24,
- (mc->hdr.date >> 16) & 0xff);
- prev_rev = rev;
- }
+ pr_info_once("updated to revision 0x%x, date = %04x-%02x-%02x\n",
+ rev,
+ mc->hdr.date & 0xffff,
+ mc->hdr.date >> 24,
+ (mc->hdr.date >> 16) & 0xff);

ret = UCODE_UPDATED;

--
1.8.3.1


2019-08-24 08:54:13

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH] x86/microcode: Update microcode for all cores in parallel

On Thu, Aug 22, 2019 at 11:43:47PM +0300, Mihai Carabas wrote:
> From: Ashok Raj <[email protected]>
>
> Microcode update was changed to be serialized due to restrictions after
> Spectre days. Updating serially on a large multi-socket system can be
> painful since we do this one CPU at a time. Cloud customers have expressed
> discontent as services disappear for a prolonged time. The restriction is
> that only one core goes through the update while other cores are quiesced.
> The update is now done only on the first thread of each core while other
> siblings simply wait for this to complete.
>
> Signed-off-by: Ashok Raj <[email protected]>
> Signed-off-by: Mihai Carabas <[email protected]>
> ---
> arch/x86/kernel/cpu/microcode/core.c | 44 ++++++++++++++++++++++++-----------
> arch/x86/kernel/cpu/microcode/intel.c | 14 ++++-------
> 2 files changed, 36 insertions(+), 22 deletions(-)

Thanks, I did some cleaning up and smoke testing. It looks ok so far.
The versions I'm going to hammer on more - and I'd appreciate it if you
did so too, when possible - as a reply to this message.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-08-24 08:54:39

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 1/2] x86/microcode: Update late microcode in parallel

From: Ashok Raj <[email protected]>
Date: Thu, 22 Aug 2019 23:43:47 +0300

Microcode update was changed to be serialized due to restrictions after
Spectre days. Updating serially on a large multi-socket system can be
painful since it is being done on one CPU at a time.

Cloud customers have expressed discontent as services disappear for a
prolonged time. The restriction is that only one core goes through the
update while other cores are quiesced.

Do the microcode update only on the first thread of each core while
other siblings simply wait for this to complete.

[ bp: Simplify, massage, cleanup comments. ]

Signed-off-by: Ashok Raj <[email protected]>
Signed-off-by: Mihai Carabas <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: Boris Ostrovsky <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jon Grimm <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Thomas Gleixner <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: x86-ml <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/kernel/cpu/microcode/core.c | 36 ++++++++++++++++------------
1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index cb0fdcaf1415..7019d4b2df0c 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -63,11 +63,6 @@ LIST_HEAD(microcode_cache);
*/
static DEFINE_MUTEX(microcode_mutex);

-/*
- * Serialize late loading so that CPUs get updated one-by-one.
- */
-static DEFINE_RAW_SPINLOCK(update_lock);
-
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];

struct cpu_info_ctx {
@@ -566,11 +561,18 @@ static int __reload_late(void *info)
if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
return -1;

- raw_spin_lock(&update_lock);
- apply_microcode_local(&err);
- raw_spin_unlock(&update_lock);
+ /*
+ * On an SMT system, it suffices to load the microcode on one sibling of
+ * the core because the microcode engine is shared between the threads.
+ * Synchronization still needs to take place so that no concurrent
+ * loading attempts happen on multiple threads of an SMT core. See
+ * below.
+ */
+ if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
+ apply_microcode_local(&err);
+ else
+ goto wait_for_siblings;

- /* siblings return UCODE_OK because their engine got updated already */
if (err > UCODE_NFOUND) {
pr_warn("Error reloading microcode on CPU %d\n", cpu);
ret = -1;
@@ -578,14 +580,18 @@ static int __reload_late(void *info)
ret = 1;
}

+wait_for_siblings:
+ if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
+ panic("Timeout during microcode update!\n");
+
/*
- * Increase the wait timeout to a safe value here since we're
- * serializing the microcode update and that could take a while on a
- * large number of CPUs. And that is fine as the *actual* timeout will
- * be determined by the last CPU finished updating and thus cut short.
+ * At least one thread has completed update on each core.
+ * For others, simply call the update to make sure the
+ * per-cpu cpuinfo can be updated with right microcode
+ * revision.
*/
- if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
- panic("Timeout during microcode update!\n");
+ if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
+ apply_microcode_local(&err);

return ret;
}
--
2.21.0

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-08-24 08:55:47

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 2/2] x86/microcode/intel: Issue the revision updated message only on the BSP


From: Borislav Petkov <[email protected]>
Date: Sat, 24 Aug 2019 10:01:53 +0200

... in order to not pollute dmesg with a line for each updated microcode
engine.

Signed-off-by: Borislav Petkov <[email protected]>
Cc: Ashok Raj <[email protected]>
Cc: Boris Ostrovsky <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jon Grimm <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: Mihai Carabas <[email protected]>
Cc: [email protected]
Cc: Thomas Gleixner <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: x86-ml <[email protected]>
---
arch/x86/kernel/cpu/microcode/intel.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce799cfe9434..6a99535d7f37 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -791,6 +791,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
struct microcode_intel *mc;
enum ucode_state ret;
static int prev_rev;
@@ -836,7 +837,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
return UCODE_ERROR;
}

- if (rev != prev_rev) {
+ if (bsp && rev != prev_rev) {
pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
rev,
mc->hdr.date & 0xffff,
@@ -852,7 +853,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
c->microcode = rev;

/* Update boot_cpu_data's revision too, if we're on the BSP: */
- if (c->cpu_index == boot_cpu_data.cpu_index)
+ if (bsp)
boot_cpu_data.microcode = rev;

return ret;
--
2.21.0


--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-08-26 13:28:05

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On Mon, Aug 26, 2019 at 08:53:05AM -0400, Boris Ostrovsky wrote:
> What is the advantage of having those other threads go through
> find_patch() and (in Intel case) intel_get_microcode_revision() (which
> involves two MSR accesses) vs. having the master sibling update slaves'
> microcode revisions? There are only two things that need to be updated,
> uci->cpu_sig.rev and c->microcode.

Less code churn and simplicity.

I accept non-ugly patches, of course. :-)

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-08-26 13:38:37

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/microcode/intel: Issue the revision updated message only on the BSP

On 8/24/19 4:53 AM, Borislav Petkov wrote:
> From: Borislav Petkov <[email protected]>
> Date: Sat, 24 Aug 2019 10:01:53 +0200
>
> ... in order to not pollute dmesg with a line for each updated microcode
> engine.
>
> Signed-off-by: Borislav Petkov <[email protected]>
> Cc: Ashok Raj <[email protected]>
> Cc: Boris Ostrovsky <[email protected]>
> Cc: "H. Peter Anvin" <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Jon Grimm <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: Mihai Carabas <[email protected]>
> Cc: [email protected]
> Cc: Thomas Gleixner <[email protected]>
> Cc: Tom Lendacky <[email protected]>
> Cc: x86-ml <[email protected]>
> ---
> arch/x86/kernel/cpu/microcode/intel.c | 5 +++--


AMD too?


-boris

2019-08-26 13:49:01

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/microcode/intel: Issue the revision updated message only on the BSP

On Mon, Aug 26, 2019 at 09:34:01AM -0400, Boris Ostrovsky wrote:
> AMD too?

We're not doing this output shortening on AMD at all. As before,
non-ugly patches are welcome. :)

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-08-26 15:10:05

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On 8/24/19 4:53 AM, Borislav Petkov wrote:
>
> +wait_for_siblings:
> + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
> + panic("Timeout during microcode update!\n");
> +
> /*
> - * Increase the wait timeout to a safe value here since we're
> - * serializing the microcode update and that could take a while on a
> - * large number of CPUs. And that is fine as the *actual* timeout will
> - * be determined by the last CPU finished updating and thus cut short.
> + * At least one thread has completed update on each core.
> + * For others, simply call the update to make sure the
> + * per-cpu cpuinfo can be updated with right microcode
> + * revision.


What is the advantage of having those other threads go through
find_patch() and (in Intel case) intel_get_microcode_revision() (which
involves two MSR accesses) vs. having the master sibling update slaves'
microcode revisions? There are only two things that need to be updated,
uci->cpu_sig.rev and c->microcode.

-boris


> */
> - if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
> - panic("Timeout during microcode update!\n");
> + if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
> + apply_microcode_local(&err);
>
> return ret;
> }

2019-08-26 20:48:28

by Ashok Raj

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

Hi Boris

Minor nit: Small commit log fixup below.

On Sat, Aug 24, 2019 at 10:53:00AM +0200, Borislav Petkov wrote:
> From: Ashok Raj <[email protected]>
> Date: Thu, 22 Aug 2019 23:43:47 +0300
>
> Microcode update was changed to be serialized due to restrictions after
> Spectre days. Updating serially on a large multi-socket system can be
> painful since it is being done on one CPU at a time.
>
> Cloud customers have expressed discontent as services disappear for a
> prolonged time. The restriction is that only one core goes through the
s/one core/one thread of a core/

> update while other cores are quiesced.
s/cores/other thread(s) of the core

>
> Do the microcode update only on the first thread of each core while
> other siblings simply wait for this to complete.
>
> [ bp: Simplify, massage, cleanup comments. ]
>

Cheers,
Ashok

2019-08-26 20:48:56

by Ashok Raj

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On Mon, Aug 26, 2019 at 08:53:05AM -0400, Boris Ostrovsky wrote:
> On 8/24/19 4:53 AM, Borislav Petkov wrote:
> >
> > +wait_for_siblings:
> > + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
> > + panic("Timeout during microcode update!\n");
> > +
> > /*
> > - * Increase the wait timeout to a safe value here since we're
> > - * serializing the microcode update and that could take a while on a
> > - * large number of CPUs. And that is fine as the *actual* timeout will
> > - * be determined by the last CPU finished updating and thus cut short.
> > + * At least one thread has completed update on each core.
> > + * For others, simply call the update to make sure the
> > + * per-cpu cpuinfo can be updated with right microcode
> > + * revision.
>
>
> What is the advantage of having those other threads go through
> find_patch() and (in Intel case) intel_get_microcode_revision() (which
> involves two MSR accesses) vs. having the master sibling update slaves'
> microcode revisions? There are only two things that need to be updated,
> uci->cpu_sig.rev and c->microcode.
>

True, yes we could do that. But there is some warm and comfy feeling
that you really read the revision from the thread local copy of the revision
and it matches what was updated in the other thread sibling rather than
just hardcoding the fixup's. The code looks clean in the sense it looks like
you are attempting to upgrade but the new revision is reflected correctly
and you skip the update.

But if you feel complelled, i'm not opposed to it as long as Boris is
happy with the changes :-).

Cheers,
Ashok



2019-08-27 12:26:12

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On Mon, Aug 26, 2019 at 01:23:39PM -0700, Raj, Ashok wrote:
> > Cloud customers have expressed discontent as services disappear for a
> > prolonged time. The restriction is that only one core goes through the
> s/one core/one thread of a core/
>
> > update while other cores are quiesced.
> s/cores/other thread(s) of the core

Changed it to:

"Cloud customers have expressed discontent as services disappear for
a prolonged time. The restriction is that only one core (or only one
thread of a core in the case of an SMT system) goes through the update
while other cores (or respectively, SMT threads) are quiesced."

Thx.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-08-27 19:43:07

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On Mon, Aug 26, 2019 at 01:32:48PM -0700, Raj, Ashok wrote:
> On Mon, Aug 26, 2019 at 08:53:05AM -0400, Boris Ostrovsky wrote:
> > On 8/24/19 4:53 AM, Borislav Petkov wrote:
> > >
> > > +wait_for_siblings:
> > > + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
> > > + panic("Timeout during microcode update!\n");
> > > +
> > > /*
> > > - * Increase the wait timeout to a safe value here since we're
> > > - * serializing the microcode update and that could take a while on a
> > > - * large number of CPUs. And that is fine as the *actual* timeout will
> > > - * be determined by the last CPU finished updating and thus cut short.
> > > + * At least one thread has completed update on each core.
> > > + * For others, simply call the update to make sure the
> > > + * per-cpu cpuinfo can be updated with right microcode
> > > + * revision.
> >
> >
> > What is the advantage of having those other threads go through
> > find_patch() and (in Intel case) intel_get_microcode_revision() (which
> > involves two MSR accesses) vs. having the master sibling update slaves'
> > microcode revisions? There are only two things that need to be updated,
> > uci->cpu_sig.rev and c->microcode.
> >
>
> True, yes we could do that. But there is some warm and comfy feeling
> that you really read the revision from the thread local copy of the revision
> and it matches what was updated in the other thread sibling rather than
> just hardcoding the fixup's. The code looks clean in the sense it looks like
> you are attempting to upgrade but the new revision is reflected correctly
> and you skip the update.
>
> But if you feel complelled, i'm not opposed to it as long as Boris is
> happy with the changes :-).



Something like this. I only lightly tested this but if there is interest
I can test it more.



From: Boris Ostrovsky <[email protected]>
Date: Tue, 27 Aug 2019 08:26:08 -0400
Subject: [PATCH 2/2] x86/microcode: Have master sibling update slaves' data

Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/kernel/cpu/microcode/amd.c | 21 ++++--------------
arch/x86/kernel/cpu/microcode/core.c | 40 ++++++++++++++++++++---------------
arch/x86/kernel/cpu/microcode/intel.c | 18 +++-------------
3 files changed, 30 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index a0e52bd..a365f72 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -668,11 +668,9 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)

static enum ucode_state apply_microcode_amd(int cpu)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_amd *mc_amd;
struct ucode_cpu_info *uci;
struct ucode_patch *p;
- enum ucode_state ret;
u32 rev, dummy;

BUG_ON(raw_smp_processor_id() != cpu);
@@ -689,10 +687,8 @@ static enum ucode_state apply_microcode_amd(int cpu)
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);

/* need to apply patch? */
- if (rev >= mc_amd->hdr.patch_id) {
- ret = UCODE_OK;
- goto out;
- }
+ if (rev >= mc_amd->hdr.patch_id)
+ return UCODE_OK;

if (__apply_microcode_amd(mc_amd)) {
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
@@ -700,20 +696,11 @@ static enum ucode_state apply_microcode_amd(int cpu)
return UCODE_ERROR;
}

- rev = mc_amd->hdr.patch_id;
- ret = UCODE_UPDATED;
-
- pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
-
-out:
uci->cpu_sig.rev = rev;
- c->microcode = rev;

- /* Update boot_cpu_data's revision too, if we're on the BSP: */
- if (c->cpu_index == boot_cpu_data.cpu_index)
- boot_cpu_data.microcode = rev;
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);

- return ret;
+ return UCODE_UPDATED;
}

static size_t install_equiv_cpu_table(const u8 *buf, size_t buf_size)
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 7019d4b..09643c6 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -551,6 +551,8 @@ static int __wait_for_cpus(atomic_t *t, long long timeout)
static int __reload_late(void *info)
{
int cpu = smp_processor_id();
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ bool bsp = (c->cpu_index == boot_cpu_data.cpu_index);
enum ucode_state err;
int ret = 0;

@@ -568,30 +570,34 @@ static int __reload_late(void *info)
* loading attempts happen on multiple threads of an SMT core. See
* below.
*/
- if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
+ if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu) {
apply_microcode_local(&err);
- else
- goto wait_for_siblings;

- if (err > UCODE_NFOUND) {
- pr_warn("Error reloading microcode on CPU %d\n", cpu);
- ret = -1;
- } else if (err == UCODE_UPDATED || err == UCODE_OK) {
- ret = 1;
+ if (err > UCODE_NFOUND) {
+ pr_warn("Error reloading microcode on CPU %d\n", cpu);
+ ret = -1;
+ } else if (err == UCODE_UPDATED || err == UCODE_OK) {
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ int sibling, rev = uci->cpu_sig.rev;
+
+ /* All siblings have same revision */
+ for_each_cpu(sibling, topology_sibling_cpumask(cpu)) {
+ c = &cpu_data(sibling);
+ uci = ucode_cpu_info + sibling;
+
+ uci->cpu_sig.rev = rev;
+ c->microcode = rev;
+ }
+
+ ret = 1;
+ }
}

-wait_for_siblings:
if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
panic("Timeout during microcode update!\n");

- /*
- * At least one thread has completed update on each core.
- * For others, simply call the update to make sure the
- * per-cpu cpuinfo can be updated with right microcode
- * revision.
- */
- if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
- apply_microcode_local(&err);
+ if (bsp)
+ boot_cpu_data.microcode = c->microcode;

return ret;
}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce799cf..04e9aa2 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -790,9 +790,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
static enum ucode_state apply_microcode_intel(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_intel *mc;
- enum ucode_state ret;
static int prev_rev;
u32 rev;

@@ -814,10 +812,8 @@ static enum ucode_state apply_microcode_intel(int cpu)
* already.
*/
rev = intel_get_microcode_revision();
- if (rev >= mc->hdr.rev) {
- ret = UCODE_OK;
- goto out;
- }
+ if (rev >= mc->hdr.rev)
+ return UCODE_OK;

/*
* Writeback and invalidate caches before updating microcode to avoid
@@ -845,17 +841,9 @@ static enum ucode_state apply_microcode_intel(int cpu)
prev_rev = rev;
}

- ret = UCODE_UPDATED;
-
-out:
uci->cpu_sig.rev = rev;
- c->microcode = rev;
-
- /* Update boot_cpu_data's revision too, if we're on the BSP: */
- if (c->cpu_index == boot_cpu_data.cpu_index)
- boot_cpu_data.microcode = rev;

- return ret;
+ return UCODE_UPDATED;
}

static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)
--
1.8.3.1



2019-08-27 21:25:29

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On 8/27/19 3:43 PM, Boris Ostrovsky wrote:
>
>
> Something like this. I only lightly tested this but if there is interest
> I can test it more.

This was a bit too aggressive with changes to arch-specific code, only
changes to __reload_late() would be needed.


-boris


2019-08-28 00:57:50

by Ashok Raj

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On Tue, Aug 27, 2019 at 02:25:01PM +0200, Borislav Petkov wrote:
> On Mon, Aug 26, 2019 at 01:23:39PM -0700, Raj, Ashok wrote:
> > > Cloud customers have expressed discontent as services disappear for a
> > > prolonged time. The restriction is that only one core goes through the
> > s/one core/one thread of a core/
> >
> > > update while other cores are quiesced.
> > s/cores/other thread(s) of the core
>
> Changed it to:
>
> "Cloud customers have expressed discontent as services disappear for
> a prolonged time. The restriction is that only one core (or only one
> thread of a core in the case of an SMT system) goes through the update
> while other cores (or respectively, SMT threads) are quiesced."

the last line seems to imply that only one core can be updated at a time.
But the only requirement is on a per-core basis, the HT thread sharing
a core must be quiesced while the microcode update is performed.

for ex. if you have 2 cores, you can update microcode on both cores
at the same time.

C0T0, C0T1 - If you are updating on C0T0, C0T1 must be waiting for the update
to complete

But You can initiate the microcode update on C1T0 simultaneously.


>
> Thx.
>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.

2019-08-28 19:16:40

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On Tue, Aug 27, 2019 at 05:56:30PM -0700, Raj, Ashok wrote:
> > "Cloud customers have expressed discontent as services disappear for
> > a prolonged time. The restriction is that only one core (or only one
> > thread of a core in the case of an SMT system) goes through the update
> > while other cores (or respectively, SMT threads) are quiesced."
>
> the last line seems to imply that only one core can be updated at a time.

Only one core *is* being updated at a time now, before the parallel
loading patch. Look at the code. I'm talking about what the code does,
not what the requirement is.

Maybe it should not say "restriction" above but the sentence should
start with: "Currently, only one core... "

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-08-28 19:19:30

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On Tue, Aug 27, 2019 at 05:24:07PM -0400, Boris Ostrovsky wrote:
> This was a bit too aggressive with changes to arch-specific code, only
> changes to __reload_late() would be needed.

Yeah, it is not that ugly but the moment the microcode engine is not
shared between the SMT threads anymore, this needs to go. And frankly,
if there's nothing else speaking against the current variant, I'd like
to keep it simple.

Thx.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-08-28 19:46:52

by Ashok Raj

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/microcode: Update late microcode in parallel

On Wed, Aug 28, 2019 at 09:13:31PM +0200, Borislav Petkov wrote:
> On Tue, Aug 27, 2019 at 05:56:30PM -0700, Raj, Ashok wrote:
> > > "Cloud customers have expressed discontent as services disappear for
> > > a prolonged time. The restriction is that only one core (or only one
> > > thread of a core in the case of an SMT system) goes through the update
> > > while other cores (or respectively, SMT threads) are quiesced."
> >
> > the last line seems to imply that only one core can be updated at a time.
>
> Only one core *is* being updated at a time now, before the parallel
> loading patch. Look at the code. I'm talking about what the code does,
> not what the requirement is.

Crystal :-)

>
> Maybe it should not say "restriction" above but the sentence should
> start with: "Currently, only one core... "

That will help clear things up..

>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.

Subject: [tip: x86/microcode] x86/microcode/intel: Issue the revision updated message only on the BSP

The following commit has been merged into the x86/microcode branch of tip:

Commit-ID: 811ae8ba6dca6b91a3ceccf9d40b98818cc4f400
Gitweb: https://git.kernel.org/tip/811ae8ba6dca6b91a3ceccf9d40b98818cc4f400
Author: Borislav Petkov <[email protected]>
AuthorDate: Sat, 24 Aug 2019 10:01:53 +02:00
Committer: Borislav Petkov <[email protected]>
CommitterDate: Tue, 01 Oct 2019 16:06:35 +02:00

x86/microcode/intel: Issue the revision updated message only on the BSP

... in order to not pollute dmesg with a line for each updated microcode
engine.

Signed-off-by: Borislav Petkov <[email protected]>
Cc: Ashok Raj <[email protected]>
Cc: Boris Ostrovsky <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jon Grimm <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: Mihai Carabas <[email protected]>
Cc: [email protected]
Cc: Thomas Gleixner <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: x86-ml <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/kernel/cpu/microcode/intel.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce799cf..6a99535 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -791,6 +791,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
struct microcode_intel *mc;
enum ucode_state ret;
static int prev_rev;
@@ -836,7 +837,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
return UCODE_ERROR;
}

- if (rev != prev_rev) {
+ if (bsp && rev != prev_rev) {
pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
rev,
mc->hdr.date & 0xffff,
@@ -852,7 +853,7 @@ out:
c->microcode = rev;

/* Update boot_cpu_data's revision too, if we're on the BSP: */
- if (c->cpu_index == boot_cpu_data.cpu_index)
+ if (bsp)
boot_cpu_data.microcode = rev;

return ret;

Subject: [tip: x86/microcode] x86/microcode: Update late microcode in parallel

The following commit has been merged into the x86/microcode branch of tip:

Commit-ID: 93946a33b5693a6bbcf917a170198ff4afaa7a31
Gitweb: https://git.kernel.org/tip/93946a33b5693a6bbcf917a170198ff4afaa7a31
Author: Ashok Raj <[email protected]>
AuthorDate: Thu, 22 Aug 2019 23:43:47 +03:00
Committer: Borislav Petkov <[email protected]>
CommitterDate: Tue, 01 Oct 2019 15:58:54 +02:00

x86/microcode: Update late microcode in parallel

Microcode update was changed to be serialized due to restrictions after
Spectre days. Updating serially on a large multi-socket system can be
painful since it is being done on one CPU at a time.

Cloud customers have expressed discontent as services disappear for
a prolonged time. The restriction is that only one core (or only one
thread of a core in the case of an SMT system) goes through the update
while other cores (or respectively, SMT threads) are quiesced.

Do the microcode update only on the first thread of each core while
other siblings simply wait for this to complete.

[ bp: Simplify, massage, cleanup comments. ]

Signed-off-by: Ashok Raj <[email protected]>
Signed-off-by: Mihai Carabas <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: Boris Ostrovsky <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jon Grimm <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Thomas Gleixner <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: x86-ml <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/kernel/cpu/microcode/core.c | 36 +++++++++++++++------------
1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index cb0fdca..7019d4b 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -63,11 +63,6 @@ LIST_HEAD(microcode_cache);
*/
static DEFINE_MUTEX(microcode_mutex);

-/*
- * Serialize late loading so that CPUs get updated one-by-one.
- */
-static DEFINE_RAW_SPINLOCK(update_lock);
-
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];

struct cpu_info_ctx {
@@ -566,11 +561,18 @@ static int __reload_late(void *info)
if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
return -1;

- raw_spin_lock(&update_lock);
- apply_microcode_local(&err);
- raw_spin_unlock(&update_lock);
+ /*
+ * On an SMT system, it suffices to load the microcode on one sibling of
+ * the core because the microcode engine is shared between the threads.
+ * Synchronization still needs to take place so that no concurrent
+ * loading attempts happen on multiple threads of an SMT core. See
+ * below.
+ */
+ if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
+ apply_microcode_local(&err);
+ else
+ goto wait_for_siblings;

- /* siblings return UCODE_OK because their engine got updated already */
if (err > UCODE_NFOUND) {
pr_warn("Error reloading microcode on CPU %d\n", cpu);
ret = -1;
@@ -578,14 +580,18 @@ static int __reload_late(void *info)
ret = 1;
}

+wait_for_siblings:
+ if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
+ panic("Timeout during microcode update!\n");
+
/*
- * Increase the wait timeout to a safe value here since we're
- * serializing the microcode update and that could take a while on a
- * large number of CPUs. And that is fine as the *actual* timeout will
- * be determined by the last CPU finished updating and thus cut short.
+ * At least one thread has completed update on each core.
+ * For others, simply call the update to make sure the
+ * per-cpu cpuinfo can be updated with right microcode
+ * revision.
*/
- if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
- panic("Timeout during microcode update!\n");
+ if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
+ apply_microcode_local(&err);

return ret;
}