2018-02-28 10:30:31

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 0/7] x86/microcode: Improve late loading

From: Borislav Petkov <[email protected]>

Hi,

here are a bunch of patches which improve microcode late loading.

Before you read any further: the early loading method is still the
preferred one and you should always do that. This patchset is improving
the late loading mechanism for long running jobs and cloud use cases -
i.e., use cases where early loading is, hm, a bit problematic.

Ashok Raj (4):
x86/microcode/intel: Check microcode revision before updating sibling
threads
x86/microcode/intel: Writeback and invalidate caches before updating
microcode
x86/microcode: Do not upload microcode if CPUs are offline
x86/microcode: Synchronize late microcode loading

Borislav Petkov (3):
x86/microcode: Get rid of struct apply_microcode_ctx
x86/microcode/intel: Look into the patch cache first
x86/microcode: Request microcode on the BSP

arch/x86/kernel/cpu/microcode/core.c | 158 +++++++++++++++++++++++++---------
arch/x86/kernel/cpu/microcode/intel.c | 48 +++++++++--
2 files changed, 159 insertions(+), 47 deletions(-)

--
2.13.0



2018-02-28 10:31:08

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 7/7] x86/microcode: Synchronize late microcode loading

From: Ashok Raj <[email protected]>

Original idea by Ashok, completely rewritten by Borislav.

Before you read any further: the early loading method is still the
preferred one and you should always do that. The following patch is
improving the late loading mechanism for long running jobs and cloud use
cases.

Gather all cores and serialize the microcode update on them by doing it
one-by-one to make the late update process as reliable as possible and
avoid potential issues caused by the microcode update.

Signed-off-by: Ashok Raj <[email protected]>
[Rewrite completely. ]
Co-developed-by: Borislav Petkov <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/microcode/core.c | 118 +++++++++++++++++++++++++++--------
1 file changed, 92 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 5dd157d48606..70ecbc8099c9 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -22,13 +22,16 @@
#define pr_fmt(fmt) "microcode: " fmt

#include <linux/platform_device.h>
+#include <linux/stop_machine.h>
#include <linux/syscore_ops.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
#include <linux/firmware.h>
#include <linux/kernel.h>
+#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
+#include <linux/nmi.h>
#include <linux/fs.h>
#include <linux/mm.h>

@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
*/
static DEFINE_MUTEX(microcode_mutex);

+/*
+ * Serialize late loading so that CPUs get updated one-by-one.
+ */
+static DEFINE_SPINLOCK(update_lock);
+
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];

struct cpu_info_ctx {
@@ -486,6 +494,19 @@ static void __exit microcode_dev_exit(void)
/* fake device for request_firmware */
static struct platform_device *microcode_pdev;

+/*
+ * Late loading dance. Why the heavy-handed stomp_machine effort?
+ *
+ * - HT siblings must be idle and not execute other code while the other sibling
+ * is loading microcode in order to avoid any negative interactions caused by
+ * the loading.
+ *
+ * - In addition, microcode update on the cores must be serialized until this
+ * requirement can be relaxed in the future. Right now, this is conservative
+ * and good.
+ */
+#define SPINUNIT 100 /* 100 nsec */
+
static int check_online_cpus(void)
{
if (num_online_cpus() == num_present_cpus())
@@ -496,23 +517,85 @@ static int check_online_cpus(void)
return -EINVAL;
}

-static enum ucode_state reload_for_cpu(int cpu)
+static atomic_t late_cpus;
+
+/*
+ * Returns:
+ * < 0 - on error
+ * 0 - no update done
+ * 1 - microcode was updated
+ */
+static int __reload_late(void *info)
{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ unsigned int timeout = NSEC_PER_SEC;
+ int all_cpus = num_online_cpus();
+ int cpu = smp_processor_id();
+ enum ucode_state err;
+ int ret = 0;

- if (!uci->valid)
- return UCODE_OK;
+ atomic_dec(&late_cpus);
+
+ /*
+ * Wait for all CPUs to arrive. A load will not be attempted unless all
+ * CPUs show up.
+ * */
+ while (atomic_read(&late_cpus)) {
+ if (timeout < SPINUNIT) {
+ pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
+ atomic_read(&late_cpus));
+ return -1;
+ }
+
+ ndelay(SPINUNIT);
+ timeout -= SPINUNIT;
+
+ touch_nmi_watchdog();
+ }
+
+ spin_lock(&update_lock);
+ apply_microcode_local(&err);
+ spin_unlock(&update_lock);
+
+ if (err > UCODE_NFOUND) {
+ pr_warn("Error reloading microcode on CPU %d\n", cpu);
+ ret = -1;
+ } else if (err == UCODE_UPDATED) {
+ ret = 1;
+ }

- return apply_microcode_on_target(cpu);
+ atomic_inc(&late_cpus);
+
+ while (atomic_read(&late_cpus) != all_cpus)
+ cpu_relax();
+
+ return ret;
+}
+
+/*
+ * Reload microcode late on all CPUs. Wait for a sec until they
+ * all gather together.
+ */
+static int microcode_reload_late(void)
+{
+ int ret;
+
+ atomic_set(&late_cpus, num_online_cpus());
+
+ ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ microcode_check();
+
+ return ret;
}

static ssize_t reload_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t size)
{
- int cpu, bsp = boot_cpu_data.cpu_index;
enum ucode_state tmp_ret = UCODE_OK;
- bool do_callback = false;
+ int bsp = boot_cpu_data.cpu_index;
unsigned long val;
ssize_t ret = 0;

@@ -534,30 +617,13 @@ static ssize_t reload_store(struct device *dev,
goto put;

mutex_lock(&microcode_mutex);
-
- for_each_online_cpu(cpu) {
- tmp_ret = reload_for_cpu(cpu);
- if (tmp_ret > UCODE_NFOUND) {
- pr_warn("Error reloading microcode on CPU %d\n", cpu);
-
- /* set retval for the first encountered reload error */
- if (!ret)
- ret = -EINVAL;
- }
-
- if (tmp_ret == UCODE_UPDATED)
- do_callback = true;
- }
-
- if (!ret && do_callback)
- microcode_check();
-
+ ret = microcode_reload_late();
mutex_unlock(&microcode_mutex);

put:
put_online_cpus();

- if (!ret)
+ if (ret >= 0)
ret = size;

return ret;
--
2.13.0


2018-02-28 10:31:16

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 1/7] x86/microcode: Get rid of struct apply_microcode_ctx

From: Borislav Petkov <[email protected]>

It is a useless remnant from earlier times. Use the ucode_state enum
directly.

No functionality change.

Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/microcode/core.c | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index aa1b9a422f2b..63370651e376 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -373,26 +373,23 @@ static int collect_cpu_info(int cpu)
return ret;
}

-struct apply_microcode_ctx {
- enum ucode_state err;
-};
-
static void apply_microcode_local(void *arg)
{
- struct apply_microcode_ctx *ctx = arg;
+ enum ucode_state *err = arg;

- ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+ *err = microcode_ops->apply_microcode(smp_processor_id());
}

static int apply_microcode_on_target(int cpu)
{
- struct apply_microcode_ctx ctx = { .err = 0 };
+ enum ucode_state err;
int ret;

- ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
- if (!ret)
- ret = ctx.err;
-
+ ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1);
+ if (!ret) {
+ if (err == UCODE_ERROR)
+ ret = 1;
+ }
return ret;
}

--
2.13.0


2018-02-28 10:31:30

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 2/7] x86/microcode/intel: Check microcode revision before updating sibling threads

From: Ashok Raj <[email protected]>

After updating microcode on one of the threads of a core, the other
thread sibling automatically gets the update since the microcode
resources on a hyperthreaded core are shared between the two threads.

Check the microcode revision on the CPU before performing a microcode
update and thus save us the WRMSR 0x79 because it is a particularly
expensive operation.

Signed-off-by: Ashok Raj <[email protected]>
Cc: X86 ML <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
[ Massage it. ]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/microcode/intel.c | 27 ++++++++++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 923054a6b760..87bd6dc94081 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -589,6 +589,17 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (!mc)
return 0;

+ /*
+ * Save us the MSR write below - which is a particular expensive
+ * operation - when the other hyperthread has updated the microcode
+ * already.
+ */
+ rev = intel_get_microcode_revision();
+ if (rev >= mc->hdr.rev) {
+ uci->cpu_sig.rev = rev;
+ return UCODE_OK;
+ }
+
/* write microcode via MSR 0x79 */
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);

@@ -776,7 +787,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
{
struct microcode_intel *mc;
struct ucode_cpu_info *uci;
- struct cpuinfo_x86 *c;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
static int prev_rev;
u32 rev;

@@ -793,6 +804,18 @@ static enum ucode_state apply_microcode_intel(int cpu)
return UCODE_NFOUND;
}

+ /*
+ * Save us the MSR write below - which is a particular expensive
+ * operation - when the other hyperthread has updated the microcode
+ * already.
+ */
+ rev = intel_get_microcode_revision();
+ if (rev >= mc->hdr.rev) {
+ uci->cpu_sig.rev = rev;
+ c->microcode = rev;
+ return UCODE_OK;
+ }
+
/* write microcode via MSR 0x79 */
wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);

@@ -813,8 +836,6 @@ static enum ucode_state apply_microcode_intel(int cpu)
prev_rev = rev;
}

- c = &cpu_data(cpu);
-
uci->cpu_sig.rev = rev;
c->microcode = rev;

--
2.13.0


2018-02-28 10:31:50

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 5/7] x86/microcode/intel: Look into the patch cache first

From: Borislav Petkov <[email protected]>

The cache might contain a newer patch - look in there first.

A follow-on change will make sure newest patches are loaded into the
cache of microcode patches.

Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/microcode/intel.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index e2864bc2d575..2aded9db1d42 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -791,9 +791,9 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)

static enum ucode_state apply_microcode_intel(int cpu)
{
- struct microcode_intel *mc;
- struct ucode_cpu_info *uci;
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct microcode_intel *mc;
static int prev_rev;
u32 rev;

@@ -801,11 +801,10 @@ static enum ucode_state apply_microcode_intel(int cpu)
if (WARN_ON(raw_smp_processor_id() != cpu))
return UCODE_ERROR;

- uci = ucode_cpu_info + cpu;
- mc = uci->mc;
+ /* Look for a newer patch in our cache: */
+ mc = find_patch(uci);
if (!mc) {
- /* Look for a newer patch in our cache: */
- mc = find_patch(uci);
+ mc = uci->mc;
if (!mc)
return UCODE_NFOUND;
}
--
2.13.0


2018-02-28 10:32:06

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 6/7] x86/microcode: Request microcode on the BSP

From: Borislav Petkov <[email protected]>

... so that any newer version can land in the cache and can later be
fished out by the application functions. Do that before grabbing the
hotplug lock.

Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/microcode/core.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index fa32cb3dcca5..5dd157d48606 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -499,15 +499,10 @@ static int check_online_cpus(void)
static enum ucode_state reload_for_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- enum ucode_state ustate;

if (!uci->valid)
return UCODE_OK;

- ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
- if (ustate != UCODE_OK)
- return ustate;
-
return apply_microcode_on_target(cpu);
}

@@ -515,11 +510,11 @@ static ssize_t reload_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t size)
{
+ int cpu, bsp = boot_cpu_data.cpu_index;
enum ucode_state tmp_ret = UCODE_OK;
bool do_callback = false;
unsigned long val;
ssize_t ret = 0;
- int cpu;

ret = kstrtoul(buf, 0, &val);
if (ret)
@@ -528,6 +523,10 @@ static ssize_t reload_store(struct device *dev,
if (val != 1)
return size;

+ tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
+ if (tmp_ret != UCODE_OK)
+ return size;
+
get_online_cpus();

ret = check_online_cpus();
--
2.13.0


2018-02-28 10:32:40

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 3/7] x86/microcode/intel: Writeback and invalidate caches before updating microcode

From: Ashok Raj <[email protected]>

Updating microcode is less error prone when caches have been flushed and
depending on what exactly the microcode is updating. For example, some
of the issues around certain Broadwell parts can be addressed by doing a
full cache flush.

Signed-off-by: Ashok Raj <[email protected]>
Cc: X86 ML <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
[ Massage it and use native_wbinvd() in both cases. ]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/microcode/intel.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 87bd6dc94081..e2864bc2d575 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -600,6 +600,12 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
return UCODE_OK;
}

+ /*
+ * Writeback and invalidate caches before updating microcode to avoid
+ * internal issues depending on what the microcode is updating.
+ */
+ native_wbinvd();
+
/* write microcode via MSR 0x79 */
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);

@@ -816,6 +822,12 @@ static enum ucode_state apply_microcode_intel(int cpu)
return UCODE_OK;
}

+ /*
+ * Writeback and invalidate caches before updating microcode to avoid
+ * internal issues depending on what the microcode is updating.
+ */
+ native_wbinvd();
+
/* write microcode via MSR 0x79 */
wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);

--
2.13.0


2018-02-28 10:33:12

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 4/7] x86/microcode: Do not upload microcode if CPUs are offline

From: Ashok Raj <[email protected]>

Avoid loading microcode if any of the CPUs are offline, and issue a
warning. Having different microcode revisions on the system at any time
is outright dangerous.

Signed-off-by: Ashok Raj <[email protected]>
Cc: x86-ml <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
[ Massage it. ]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/microcode/core.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 63370651e376..fa32cb3dcca5 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -486,6 +486,16 @@ static void __exit microcode_dev_exit(void)
/* fake device for request_firmware */
static struct platform_device *microcode_pdev;

+static int check_online_cpus(void)
+{
+ if (num_online_cpus() == num_present_cpus())
+ return 0;
+
+ pr_err("Not all CPUs online, aborting microcode update.\n");
+
+ return -EINVAL;
+}
+
static enum ucode_state reload_for_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -519,7 +529,13 @@ static ssize_t reload_store(struct device *dev,
return size;

get_online_cpus();
+
+ ret = check_online_cpus();
+ if (ret)
+ goto put;
+
mutex_lock(&microcode_mutex);
+
for_each_online_cpu(cpu) {
tmp_ret = reload_for_cpu(cpu);
if (tmp_ret > UCODE_NFOUND) {
@@ -538,6 +554,8 @@ static ssize_t reload_store(struct device *dev,
microcode_check();

mutex_unlock(&microcode_mutex);
+
+put:
put_online_cpus();

if (!ret)
--
2.13.0


Subject: Re: [PATCH 4/7] x86/microcode: Do not upload microcode if CPUs are offline

On Wed, 28 Feb 2018, Borislav Petkov wrote:
> Avoid loading microcode if any of the CPUs are offline, and issue a
> warning. Having different microcode revisions on the system at any time
> is outright dangerous.

Even if we update that microcode during CPU early bring-up, before we
mark it on-line and start using it?

AFAIK, late-loading or not, this is what should happen in the current
code: APs that are brought up after a microcode update is loaded (either
by the early or late driver, it doesn't matter) will be always
*early-updated* to the new microcode.

Is it dangerous to have an offline core at an older microcode revision
than the online cores?

I am not against the patch, mind you, but I am curious about why it is
supposed to be dangerous if we're updating the CPUs before we start
using them *anyway*.

Also, if this is really dangerous, does it means safe CPU hotplug isn't
possible? AFAICT, the firmware would have to do it for us, but it
*doesn't* have the up-to-date microcode (*we* had to update it)...

--
Henrique Holschuh

2018-02-28 13:27:57

by Ashok Raj

[permalink] [raw]
Subject: Re: [PATCH 4/7] x86/microcode: Do not upload microcode if CPUs are offline

On Wed, Feb 28, 2018 at 10:11:56AM -0300, Henrique de Moraes Holschuh wrote:
> > Avoid loading microcode if any of the CPUs are offline, and issue a
> > warning. Having different microcode revisions on the system at any time
> > is outright dangerous.
>
> Even if we update that microcode during CPU early bring-up, before we
> mark it on-line and start using it?
>
> AFAIK, late-loading or not, this is what should happen in the current
> code: APs that are brought up after a microcode update is loaded (either
> by the early or late driver, it doesn't matter) will be always
> *early-updated* to the new microcode.
>
> Is it dangerous to have an offline core at an older microcode revision
> than the online cores?

We don't want to leave a system and allow the user to update microcode
when some cpus are offline. Remember cpu_offline in linux is only
logical offlining.. so the cpu is still in the system.. it can even
participate in MCE for e.g. It is very much alive. Its not a question
that "Would it not work" but its not worth to leave an open door and
being paranoid is best!



>
> I am not against the patch, mind you, but I am curious about why it is
> supposed to be dangerous if we're updating the CPUs before we start
> using them *anyway*.
>
> Also, if this is really dangerous, does it means safe CPU hotplug isn't
> possible? AFAICT, the firmware would have to do it for us, but it
> *doesn't* have the up-to-date microcode (*we* had to update it)...

The difference is hot-adding you know you are going to update the current
microcode. But leaving a cpu in offline state is leaving it stale for a long
time without realizing that you have some stale cores.


Subject: Re: [PATCH 7/7] x86/microcode: Synchronize late microcode loading

On Wed, 28 Feb 2018, Borislav Petkov wrote:
> + * Late loading dance. Why the heavy-handed stomp_machine effort?
> + *
> + * - HT siblings must be idle and not execute other code while the other sibling
> + * is loading microcode in order to avoid any negative interactions caused by
> + * the loading.
> + *
> + * - In addition, microcode update on the cores must be serialized until this
> + * requirement can be relaxed in the future. Right now, this is conservative
> + * and good.

Eek! If I read that right, this effectively halts the entire box until
every core is updated, with one core entering deep-coma at a time (the
rest are left either spinning or cpu_relax()ing depending on whether
they have already updated or not)?

If this is correct, I shudder at what it would do on a server with
dozens, or hundreds of cores... According to Ben Hawkes' paper, Intel's
on-die microcode update loader takes linear time relative to the update
size to do the crypto dance.

On my single-xeon X5550 workstation, which should be relatively fast
since its microcode update is small, the whole thing would take about
3,2 million cycles (circa 800k cycles per core, 4 cores, skipping
hyperthreads) to do a sync late update. I don't believe this has
changed much, but I *did not* test, e.g., a Skylake Xeon, or anything
newer than that Xeon X5550.

Anyway, maybe there is a safe way to do it in a more parallel fashion
based on cpu topology?

AFAIK, it is not like there is any way to make OS microcode updates
(early or late) safe against SMIs and NMIs hitting the sibling
hyperthread while updating the other, so we don't have to care about
*that* nasty corner case simply because we can't avoid it in the first
place.

Hopefully AMD has none of those pitfalls, and could just trigger an
update on half the cores at a time, easily bounding it to approximately
twice the time it takes to update a single core :-(

--
Henrique Holschuh

2018-02-28 14:09:52

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 7/7] x86/microcode: Synchronize late microcode loading

On Wed, Feb 28, 2018 at 10:59:31AM -0300, Henrique de Moraes Holschuh wrote:
> Eek! If I read that right, this effectively halts the entire box until
> every core is updated, with one core entering deep-coma at a time (the
> rest are left either spinning or cpu_relax()ing

I think *you* should relax. :)

Late microcode loading on a long running box is not something you do
more than 2-3 times a year. And if the box needs to restart, it'll get
the early microcode.

And yes, this is addressing *late* loading, if you haven't noticed yet.
I added a big fat note *twice*:

"Before you read any further: the early loading method is still the
preferred one and you should always do that. This patchset is improving
the late loading mechanism for long running jobs and cloud use cases -
i.e., use cases where early loading is, hm, a bit problematic."

So keep doing the early method and you'll be fine.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Subject: Re: [PATCH 7/7] x86/microcode: Synchronize late microcode loading

On Wed, 28 Feb 2018, Borislav Petkov wrote:
> On Wed, Feb 28, 2018 at 10:59:31AM -0300, Henrique de Moraes Holschuh wrote:
> > Eek! If I read that right, this effectively halts the entire box until
> > every core is updated, with one core entering deep-coma at a time (the
> > rest are left either spinning or cpu_relax()ing
>
> I think *you* should relax. :)

Well, I don't expect any general-use distro to unleash late loading on
the users, certainly :-) Least of all, Debian... It is, nowadays, "use
it only if you know what you're doing" land.

But it is not yet sufficiently documented as such, I fear.

> Late microcode loading on a long running box is not something you do
> more than 2-3 times a year. And if the box needs to restart, it'll get
> the early microcode.

Sure, but the thing is so damn expensive (and the time it takes is
directly proportional to the number of cores, thus likely to hurt worse
exactly those who would want to use it), that I was left wondering if it
should not be optimized further to do the work in parallel (if that can
be made safe enough).

Besides, we likely don't want to have early microcode updates end up
being the reason AP bringup has to be serialized during boot either (and
it *is* likely to dominate the time taken for AP bringup, too!), so it
would be nice to have a way to make parallel microcode updates possible
in general... but I don't think we're there, yet.

No matter. I am not opposing the patch in the first place. And any
paralell microcode update work would be best done in an incremental
fashion, on top of working serial updates, anyway.

> And yes, this is addressing *late* loading, if you haven't noticed yet.

I did get that message, yes :)

> So keep doing the early method and you'll be fine.

We need that in the documentation :-P Microcode updates have always
been somewhat slow, but now they are potentially going to be *much* more
painful and noticeable in the late-update case...

--
Henrique Holschuh

Subject: Re: [PATCH 4/7] x86/microcode: Do not upload microcode if CPUs are offline

On Wed, 28 Feb 2018, Raj, Ashok wrote:
> On Wed, Feb 28, 2018 at 10:11:56AM -0300, Henrique de Moraes Holschuh wrote:
> > > Avoid loading microcode if any of the CPUs are offline, and issue a
> > > warning. Having different microcode revisions on the system at any time
> > > is outright dangerous.
> >
> > Even if we update that microcode during CPU early bring-up, before we
> > mark it on-line and start using it?
> >
> > AFAIK, late-loading or not, this is what should happen in the current
> > code: APs that are brought up after a microcode update is loaded (either
> > by the early or late driver, it doesn't matter) will be always
> > *early-updated* to the new microcode.
> >
> > Is it dangerous to have an offline core at an older microcode revision
> > than the online cores?
>
> We don't want to leave a system and allow the user to update microcode
> when some cpus are offline. Remember cpu_offline in linux is only
> logical offlining.. so the cpu is still in the system.. it can even
> participate in MCE for e.g. It is very much alive. Its not a question
> that "Would it not work" but its not worth to leave an open door and
> being paranoid is best!

I see. Thanks for the explanation!

> > I am not against the patch, mind you, but I am curious about why it is
> > supposed to be dangerous if we're updating the CPUs before we start
> > using them *anyway*.
> >
> > Also, if this is really dangerous, does it means safe CPU hotplug isn't
> > possible? AFAICT, the firmware would have to do it for us, but it
> > *doesn't* have the up-to-date microcode (*we* had to update it)...
>
> The difference is hot-adding you know you are going to update the current
> microcode. But leaving a cpu in offline state is leaving it stale for a long
> time without realizing that you have some stale cores.

That begs the question: do we have any reasons to not update the
microcode even the offline cores?

--
Henrique Holschuh

2018-03-05 22:08:06

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH 4/7] x86/microcode: Do not upload microcode if CPUs are offline

On 2/28/2018 4:28 AM, Borislav Petkov wrote:
> From: Ashok Raj <[email protected]>
>
> Avoid loading microcode if any of the CPUs are offline, and issue a
> warning. Having different microcode revisions on the system at any time
> is outright dangerous.
>
> Signed-off-by: Ashok Raj <[email protected]>
> Cc: x86-ml <[email protected]>
> Link: http://lkml.kernel.org/r/[email protected]
> [ Massage it. ]
> Signed-off-by: Borislav Petkov <[email protected]>
> ---
> arch/x86/kernel/cpu/microcode/core.c | 18 ++++++++++++++++++
> 1 file changed, 18 insertions(+)
>

Reviewed-by: Tom Lendacky <[email protected]>

> diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
> index 63370651e376..fa32cb3dcca5 100644
> --- a/arch/x86/kernel/cpu/microcode/core.c
> +++ b/arch/x86/kernel/cpu/microcode/core.c
> @@ -486,6 +486,16 @@ static void __exit microcode_dev_exit(void)
> /* fake device for request_firmware */
> static struct platform_device *microcode_pdev;
>
> +static int check_online_cpus(void)
> +{
> + if (num_online_cpus() == num_present_cpus())
> + return 0;
> +
> + pr_err("Not all CPUs online, aborting microcode update.\n");
> +
> + return -EINVAL;
> +}
> +
> static enum ucode_state reload_for_cpu(int cpu)
> {
> struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
> @@ -519,7 +529,13 @@ static ssize_t reload_store(struct device *dev,
> return size;
>
> get_online_cpus();
> +
> + ret = check_online_cpus();
> + if (ret)
> + goto put;
> +
> mutex_lock(&microcode_mutex);
> +
> for_each_online_cpu(cpu) {
> tmp_ret = reload_for_cpu(cpu);
> if (tmp_ret > UCODE_NFOUND) {
> @@ -538,6 +554,8 @@ static ssize_t reload_store(struct device *dev,
> microcode_check();
>
> mutex_unlock(&microcode_mutex);
> +
> +put:
> put_online_cpus();
>
> if (!ret)
>

2018-03-05 22:09:41

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH 6/7] x86/microcode: Request microcode on the BSP

On 2/28/2018 4:28 AM, Borislav Petkov wrote:
> From: Borislav Petkov <[email protected]>
>
> ... so that any newer version can land in the cache and can later be
> fished out by the application functions. Do that before grabbing the
> hotplug lock.
>
> Signed-off-by: Borislav Petkov <[email protected]>
> ---
> arch/x86/kernel/cpu/microcode/core.c | 11 +++++------
> 1 file changed, 5 insertions(+), 6 deletions(-)
>

Reviewed-by: Tom Lendacky <[email protected]>

> diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
> index fa32cb3dcca5..5dd157d48606 100644
> --- a/arch/x86/kernel/cpu/microcode/core.c
> +++ b/arch/x86/kernel/cpu/microcode/core.c
> @@ -499,15 +499,10 @@ static int check_online_cpus(void)
> static enum ucode_state reload_for_cpu(int cpu)
> {
> struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
> - enum ucode_state ustate;
>
> if (!uci->valid)
> return UCODE_OK;
>
> - ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
> - if (ustate != UCODE_OK)
> - return ustate;
> -
> return apply_microcode_on_target(cpu);
> }
>
> @@ -515,11 +510,11 @@ static ssize_t reload_store(struct device *dev,
> struct device_attribute *attr,
> const char *buf, size_t size)
> {
> + int cpu, bsp = boot_cpu_data.cpu_index;
> enum ucode_state tmp_ret = UCODE_OK;
> bool do_callback = false;
> unsigned long val;
> ssize_t ret = 0;
> - int cpu;
>
> ret = kstrtoul(buf, 0, &val);
> if (ret)
> @@ -528,6 +523,10 @@ static ssize_t reload_store(struct device *dev,
> if (val != 1)
> return size;
>
> + tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
> + if (tmp_ret != UCODE_OK)
> + return size;
> +
> get_online_cpus();
>
> ret = check_online_cpus();
>

2018-03-05 22:10:53

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH 7/7] x86/microcode: Synchronize late microcode loading

On 2/28/2018 4:28 AM, Borislav Petkov wrote:
> From: Ashok Raj <[email protected]>
>
> Original idea by Ashok, completely rewritten by Borislav.
>
> Before you read any further: the early loading method is still the
> preferred one and you should always do that. The following patch is
> improving the late loading mechanism for long running jobs and cloud use
> cases.
>
> Gather all cores and serialize the microcode update on them by doing it
> one-by-one to make the late update process as reliable as possible and
> avoid potential issues caused by the microcode update.
>
> Signed-off-by: Ashok Raj <[email protected]>
> [Rewrite completely. ]
> Co-developed-by: Borislav Petkov <[email protected]>
> Signed-off-by: Borislav Petkov <[email protected]>
> ---
> arch/x86/kernel/cpu/microcode/core.c | 118 +++++++++++++++++++++++++++--------
> 1 file changed, 92 insertions(+), 26 deletions(-)
>

Reviewed-by: Tom Lendacky <[email protected]>


2018-03-05 22:15:08

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH 0/7] x86/microcode: Improve late loading

On 2/28/2018 4:28 AM, Borislav Petkov wrote:
> From: Borislav Petkov <[email protected]>
>
> Hi,
>
> here are a bunch of patches which improve microcode late loading.
>
> Before you read any further: the early loading method is still the
> preferred one and you should always do that. This patchset is improving
> the late loading mechanism for long running jobs and cloud use cases -
> i.e., use cases where early loading is, hm, a bit problematic.
>
> Ashok Raj (4):
> x86/microcode/intel: Check microcode revision before updating sibling
> threads
> x86/microcode/intel: Writeback and invalidate caches before updating
> microcode
> x86/microcode: Do not upload microcode if CPUs are offline
> x86/microcode: Synchronize late microcode loading
>
> Borislav Petkov (3):
> x86/microcode: Get rid of struct apply_microcode_ctx
> x86/microcode/intel: Look into the patch cache first
> x86/microcode: Request microcode on the BSP
>
> arch/x86/kernel/cpu/microcode/core.c | 158 +++++++++++++++++++++++++---------
> arch/x86/kernel/cpu/microcode/intel.c | 48 +++++++++--
> 2 files changed, 159 insertions(+), 47 deletions(-)
>

Tested on my Family 17h (EPYC) system (including taking CPUs offline and
back on before and after reloads). I'll try and find some additional
systems to test on, but for now:

Tested-by: Tom Lendacky <[email protected]>

2018-03-05 23:53:17

by Ashok Raj

[permalink] [raw]
Subject: Re: [PATCH 0/7] x86/microcode: Improve late loading

Thanks Tom

On Mon, 2018-03-05 at 16:12 -0600, Tom Lendacky wrote:
> Borislav Petkov (3):
> > x86/microcode: Get rid of struct apply_microcode_ctx
> > x86/microcode/intel: Look into the patch cache first
> > x86/microcode: Request microcode on the BSP
> >
> > arch/x86/kernel/cpu/microcode/core.c | 158
> > +++++++++++++++++++++++++---------
> > arch/x86/kernel/cpu/microcode/intel.c | 48 +++++++++--
> > 2 files changed, 159 insertions(+), 47 deletions(-)
> >
>
> Tested on my Family 17h (EPYC) system (including taking CPUs offline
> and
> back on before and after reloads). I'll try and find some additional
> systems to test on, but for now:
>
> Tested-by: Tom Lendacky <[email protected]>

Tested on variety of Intel systems.

Tested-by: Ashok Raj <[email protected]>


Attachments:
smime.p7s (3.17 kB)
Subject: [tip:x86/pti] x86/microcode: Get rid of struct apply_microcode_ctx

Commit-ID: 854857f5944c59a881ff607b37ed9ed41d031a3b
Gitweb: https://git.kernel.org/tip/854857f5944c59a881ff607b37ed9ed41d031a3b
Author: Borislav Petkov <[email protected]>
AuthorDate: Wed, 28 Feb 2018 11:28:40 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 8 Mar 2018 10:19:25 +0100

x86/microcode: Get rid of struct apply_microcode_ctx

It is a useless remnant from earlier times. Use the ucode_state enum
directly.

No functional change.

Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Tom Lendacky <[email protected]>
Tested-by: Ashok Raj <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/kernel/cpu/microcode/core.c | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index aa1b9a422f2b..63370651e376 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -373,26 +373,23 @@ static int collect_cpu_info(int cpu)
return ret;
}

-struct apply_microcode_ctx {
- enum ucode_state err;
-};
-
static void apply_microcode_local(void *arg)
{
- struct apply_microcode_ctx *ctx = arg;
+ enum ucode_state *err = arg;

- ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+ *err = microcode_ops->apply_microcode(smp_processor_id());
}

static int apply_microcode_on_target(int cpu)
{
- struct apply_microcode_ctx ctx = { .err = 0 };
+ enum ucode_state err;
int ret;

- ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
- if (!ret)
- ret = ctx.err;
-
+ ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1);
+ if (!ret) {
+ if (err == UCODE_ERROR)
+ ret = 1;
+ }
return ret;
}


Subject: [tip:x86/pti] x86/microcode/intel: Check microcode revision before updating sibling threads

Commit-ID: c182d2b7d0ca48e0d6ff16f7d883161238c447ed
Gitweb: https://git.kernel.org/tip/c182d2b7d0ca48e0d6ff16f7d883161238c447ed
Author: Ashok Raj <[email protected]>
AuthorDate: Wed, 28 Feb 2018 11:28:41 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 8 Mar 2018 10:19:25 +0100

x86/microcode/intel: Check microcode revision before updating sibling threads

After updating microcode on one of the threads of a core, the other
thread sibling automatically gets the update since the microcode
resources on a hyperthreaded core are shared between the two threads.

Check the microcode revision on the CPU before performing a microcode
update and thus save us the WRMSR 0x79 because it is a particularly
expensive operation.

[ Borislav: Massage changelog and coding style. ]

Signed-off-by: Ashok Raj <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Tom Lendacky <[email protected]>
Tested-by: Ashok Raj <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/kernel/cpu/microcode/intel.c | 27 ++++++++++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 923054a6b760..87bd6dc94081 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -589,6 +589,17 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (!mc)
return 0;

+ /*
+ * Save us the MSR write below - which is a particular expensive
+ * operation - when the other hyperthread has updated the microcode
+ * already.
+ */
+ rev = intel_get_microcode_revision();
+ if (rev >= mc->hdr.rev) {
+ uci->cpu_sig.rev = rev;
+ return UCODE_OK;
+ }
+
/* write microcode via MSR 0x79 */
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);

@@ -776,7 +787,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
{
struct microcode_intel *mc;
struct ucode_cpu_info *uci;
- struct cpuinfo_x86 *c;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
static int prev_rev;
u32 rev;

@@ -793,6 +804,18 @@ static enum ucode_state apply_microcode_intel(int cpu)
return UCODE_NFOUND;
}

+ /*
+ * Save us the MSR write below - which is a particular expensive
+ * operation - when the other hyperthread has updated the microcode
+ * already.
+ */
+ rev = intel_get_microcode_revision();
+ if (rev >= mc->hdr.rev) {
+ uci->cpu_sig.rev = rev;
+ c->microcode = rev;
+ return UCODE_OK;
+ }
+
/* write microcode via MSR 0x79 */
wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);

@@ -813,8 +836,6 @@ static enum ucode_state apply_microcode_intel(int cpu)
prev_rev = rev;
}

- c = &cpu_data(cpu);
-
uci->cpu_sig.rev = rev;
c->microcode = rev;


Subject: [tip:x86/pti] x86/microcode/intel: Writeback and invalidate caches before updating microcode

Commit-ID: 91df9fdf51492aec9fed6b4cbd33160886740f47
Gitweb: https://git.kernel.org/tip/91df9fdf51492aec9fed6b4cbd33160886740f47
Author: Ashok Raj <[email protected]>
AuthorDate: Wed, 28 Feb 2018 11:28:42 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 8 Mar 2018 10:19:25 +0100

x86/microcode/intel: Writeback and invalidate caches before updating microcode

Updating microcode is less error prone when caches have been flushed and
depending on what exactly the microcode is updating. For example, some
of the issues around certain Broadwell parts can be addressed by doing a
full cache flush.

[ Borislav: Massage it and use native_wbinvd() in both cases. ]

Signed-off-by: Ashok Raj <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Tom Lendacky <[email protected]>
Tested-by: Ashok Raj <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/kernel/cpu/microcode/intel.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 87bd6dc94081..e2864bc2d575 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -600,6 +600,12 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
return UCODE_OK;
}

+ /*
+ * Writeback and invalidate caches before updating microcode to avoid
+ * internal issues depending on what the microcode is updating.
+ */
+ native_wbinvd();
+
/* write microcode via MSR 0x79 */
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);

@@ -816,6 +822,12 @@ static enum ucode_state apply_microcode_intel(int cpu)
return UCODE_OK;
}

+ /*
+ * Writeback and invalidate caches before updating microcode to avoid
+ * internal issues depending on what the microcode is updating.
+ */
+ native_wbinvd();
+
/* write microcode via MSR 0x79 */
wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);


Subject: [tip:x86/pti] x86/microcode: Do not upload microcode if CPUs are offline

Commit-ID: 30ec26da9967d0d785abc24073129a34c3211777
Gitweb: https://git.kernel.org/tip/30ec26da9967d0d785abc24073129a34c3211777
Author: Ashok Raj <[email protected]>
AuthorDate: Wed, 28 Feb 2018 11:28:43 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 8 Mar 2018 10:19:26 +0100

x86/microcode: Do not upload microcode if CPUs are offline

Avoid loading microcode if any of the CPUs are offline, and issue a
warning. Having different microcode revisions on the system at any time
is outright dangerous.

[ Borislav: Massage changelog. ]

Signed-off-by: Ashok Raj <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Tom Lendacky <[email protected]>
Tested-by: Ashok Raj <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/kernel/cpu/microcode/core.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 63370651e376..fa32cb3dcca5 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -486,6 +486,16 @@ static void __exit microcode_dev_exit(void)
/* fake device for request_firmware */
static struct platform_device *microcode_pdev;

+static int check_online_cpus(void)
+{
+ if (num_online_cpus() == num_present_cpus())
+ return 0;
+
+ pr_err("Not all CPUs online, aborting microcode update.\n");
+
+ return -EINVAL;
+}
+
static enum ucode_state reload_for_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -519,7 +529,13 @@ static ssize_t reload_store(struct device *dev,
return size;

get_online_cpus();
+
+ ret = check_online_cpus();
+ if (ret)
+ goto put;
+
mutex_lock(&microcode_mutex);
+
for_each_online_cpu(cpu) {
tmp_ret = reload_for_cpu(cpu);
if (tmp_ret > UCODE_NFOUND) {
@@ -538,6 +554,8 @@ static ssize_t reload_store(struct device *dev,
microcode_check();

mutex_unlock(&microcode_mutex);
+
+put:
put_online_cpus();

if (!ret)

Subject: [tip:x86/pti] x86/microcode/intel: Look into the patch cache first

Commit-ID: d8c3b52c00a05036e0a6b315b4b17921a7b67997
Gitweb: https://git.kernel.org/tip/d8c3b52c00a05036e0a6b315b4b17921a7b67997
Author: Borislav Petkov <[email protected]>
AuthorDate: Wed, 28 Feb 2018 11:28:44 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 8 Mar 2018 10:19:26 +0100

x86/microcode/intel: Look into the patch cache first

The cache might contain a newer patch - look in there first.

A follow-on change will make sure newest patches are loaded into the
cache of microcode patches.

Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Tom Lendacky <[email protected]>
Tested-by: Ashok Raj <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/kernel/cpu/microcode/intel.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index e2864bc2d575..2aded9db1d42 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -791,9 +791,9 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)

static enum ucode_state apply_microcode_intel(int cpu)
{
- struct microcode_intel *mc;
- struct ucode_cpu_info *uci;
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct microcode_intel *mc;
static int prev_rev;
u32 rev;

@@ -801,11 +801,10 @@ static enum ucode_state apply_microcode_intel(int cpu)
if (WARN_ON(raw_smp_processor_id() != cpu))
return UCODE_ERROR;

- uci = ucode_cpu_info + cpu;
- mc = uci->mc;
+ /* Look for a newer patch in our cache: */
+ mc = find_patch(uci);
if (!mc) {
- /* Look for a newer patch in our cache: */
- mc = find_patch(uci);
+ mc = uci->mc;
if (!mc)
return UCODE_NFOUND;
}

Subject: [tip:x86/pti] x86/microcode: Request microcode on the BSP

Commit-ID: cfb52a5a09c8ae3a1dafb44ce549fde5b69e8117
Gitweb: https://git.kernel.org/tip/cfb52a5a09c8ae3a1dafb44ce549fde5b69e8117
Author: Borislav Petkov <[email protected]>
AuthorDate: Wed, 28 Feb 2018 11:28:45 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 8 Mar 2018 10:19:26 +0100

x86/microcode: Request microcode on the BSP

... so that any newer version can land in the cache and can later be
fished out by the application functions. Do that before grabbing the
hotplug lock.

Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Tom Lendacky <[email protected]>
Tested-by: Ashok Raj <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/kernel/cpu/microcode/core.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index fa32cb3dcca5..5dd157d48606 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -499,15 +499,10 @@ static int check_online_cpus(void)
static enum ucode_state reload_for_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- enum ucode_state ustate;

if (!uci->valid)
return UCODE_OK;

- ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
- if (ustate != UCODE_OK)
- return ustate;
-
return apply_microcode_on_target(cpu);
}

@@ -515,11 +510,11 @@ static ssize_t reload_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t size)
{
+ int cpu, bsp = boot_cpu_data.cpu_index;
enum ucode_state tmp_ret = UCODE_OK;
bool do_callback = false;
unsigned long val;
ssize_t ret = 0;
- int cpu;

ret = kstrtoul(buf, 0, &val);
if (ret)
@@ -528,6 +523,10 @@ static ssize_t reload_store(struct device *dev,
if (val != 1)
return size;

+ tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
+ if (tmp_ret != UCODE_OK)
+ return size;
+
get_online_cpus();

ret = check_online_cpus();

Subject: [tip:x86/pti] x86/microcode: Synchronize late microcode loading

Commit-ID: a5321aec6412b20b5ad15db2d6b916c05349dbff
Gitweb: https://git.kernel.org/tip/a5321aec6412b20b5ad15db2d6b916c05349dbff
Author: Ashok Raj <[email protected]>
AuthorDate: Wed, 28 Feb 2018 11:28:46 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 8 Mar 2018 10:19:26 +0100

x86/microcode: Synchronize late microcode loading

Original idea by Ashok, completely rewritten by Borislav.

Before you read any further: the early loading method is still the
preferred one and you should always do that. The following patch is
improving the late loading mechanism for long running jobs and cloud use
cases.

Gather all cores and serialize the microcode update on them by doing it
one-by-one to make the late update process as reliable as possible and
avoid potential issues caused by the microcode update.

[ Borislav: Rewrite completely. ]

Co-developed-by: Borislav Petkov <[email protected]>
Signed-off-by: Ashok Raj <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Tom Lendacky <[email protected]>
Tested-by: Ashok Raj <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/kernel/cpu/microcode/core.c | 118 +++++++++++++++++++++++++++--------
1 file changed, 92 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 5dd157d48606..70ecbc8099c9 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -22,13 +22,16 @@
#define pr_fmt(fmt) "microcode: " fmt

#include <linux/platform_device.h>
+#include <linux/stop_machine.h>
#include <linux/syscore_ops.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
#include <linux/firmware.h>
#include <linux/kernel.h>
+#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
+#include <linux/nmi.h>
#include <linux/fs.h>
#include <linux/mm.h>

@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
*/
static DEFINE_MUTEX(microcode_mutex);

+/*
+ * Serialize late loading so that CPUs get updated one-by-one.
+ */
+static DEFINE_SPINLOCK(update_lock);
+
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];

struct cpu_info_ctx {
@@ -486,6 +494,19 @@ static void __exit microcode_dev_exit(void)
/* fake device for request_firmware */
static struct platform_device *microcode_pdev;

+/*
+ * Late loading dance. Why the heavy-handed stomp_machine effort?
+ *
+ * - HT siblings must be idle and not execute other code while the other sibling
+ * is loading microcode in order to avoid any negative interactions caused by
+ * the loading.
+ *
+ * - In addition, microcode update on the cores must be serialized until this
+ * requirement can be relaxed in the future. Right now, this is conservative
+ * and good.
+ */
+#define SPINUNIT 100 /* 100 nsec */
+
static int check_online_cpus(void)
{
if (num_online_cpus() == num_present_cpus())
@@ -496,23 +517,85 @@ static int check_online_cpus(void)
return -EINVAL;
}

-static enum ucode_state reload_for_cpu(int cpu)
+static atomic_t late_cpus;
+
+/*
+ * Returns:
+ * < 0 - on error
+ * 0 - no update done
+ * 1 - microcode was updated
+ */
+static int __reload_late(void *info)
{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ unsigned int timeout = NSEC_PER_SEC;
+ int all_cpus = num_online_cpus();
+ int cpu = smp_processor_id();
+ enum ucode_state err;
+ int ret = 0;

- if (!uci->valid)
- return UCODE_OK;
+ atomic_dec(&late_cpus);
+
+ /*
+ * Wait for all CPUs to arrive. A load will not be attempted unless all
+ * CPUs show up.
+ * */
+ while (atomic_read(&late_cpus)) {
+ if (timeout < SPINUNIT) {
+ pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
+ atomic_read(&late_cpus));
+ return -1;
+ }
+
+ ndelay(SPINUNIT);
+ timeout -= SPINUNIT;
+
+ touch_nmi_watchdog();
+ }
+
+ spin_lock(&update_lock);
+ apply_microcode_local(&err);
+ spin_unlock(&update_lock);
+
+ if (err > UCODE_NFOUND) {
+ pr_warn("Error reloading microcode on CPU %d\n", cpu);
+ ret = -1;
+ } else if (err == UCODE_UPDATED) {
+ ret = 1;
+ }

- return apply_microcode_on_target(cpu);
+ atomic_inc(&late_cpus);
+
+ while (atomic_read(&late_cpus) != all_cpus)
+ cpu_relax();
+
+ return ret;
+}
+
+/*
+ * Reload microcode late on all CPUs. Wait for a sec until they
+ * all gather together.
+ */
+static int microcode_reload_late(void)
+{
+ int ret;
+
+ atomic_set(&late_cpus, num_online_cpus());
+
+ ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ microcode_check();
+
+ return ret;
}

static ssize_t reload_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t size)
{
- int cpu, bsp = boot_cpu_data.cpu_index;
enum ucode_state tmp_ret = UCODE_OK;
- bool do_callback = false;
+ int bsp = boot_cpu_data.cpu_index;
unsigned long val;
ssize_t ret = 0;

@@ -534,30 +617,13 @@ static ssize_t reload_store(struct device *dev,
goto put;

mutex_lock(&microcode_mutex);
-
- for_each_online_cpu(cpu) {
- tmp_ret = reload_for_cpu(cpu);
- if (tmp_ret > UCODE_NFOUND) {
- pr_warn("Error reloading microcode on CPU %d\n", cpu);
-
- /* set retval for the first encountered reload error */
- if (!ret)
- ret = -EINVAL;
- }
-
- if (tmp_ret == UCODE_UPDATED)
- do_callback = true;
- }
-
- if (!ret && do_callback)
- microcode_check();
-
+ ret = microcode_reload_late();
mutex_unlock(&microcode_mutex);

put:
put_online_cpus();

- if (!ret)
+ if (ret >= 0)
ret = size;

return ret;