I am Seunghun Han and a senior security researcher at National Security
Research Institute of South Korea.
I found a security issue which can make kernel panic in userspace. After
analyzing the issue carefully, I found that MCE driver in the kernel has a
problem which can be occurred in SMP environment.
The check_interval file in
/sys/devices/system/machinecheck/machinecheck<cpu number> directory is a
global timer value for MCE polling. If it is changed by one CPU, MCE driver
in kernel calls mce_restart() function in store_int_with_restart() function
and broadcasts the event to other CPUs to delete and restart MCE polling
timer.
The __mcheck_cpu_init_timer() function which is called by mce_restart()
function initializes the mce_timer variable, and the "lock" in mce_timer is
also reinitialized. If more than one CPU write a specific value to
check_interval file concurrently, one can initialize the "lock" in mce_timer
while the others are handling "lock" in mce_timer. This problem causes some
synchronization errors such as kernel panic and kernel hang. Other functions
such as set_ignore_ce(), set_cmci_disabled(), and mce_enable_ce() also
have synchronization problems.
It could be a security problem because the attacker could make kernel panic
by writing a value to the check_interval file in userspace, and it could be
used for Denial-of-Service (DoS) attack.
To fix this problem, I added a mce_sysfs_mutex to serialize requests for
timer and sysfs functions.
Signed-off-by: Seunghun Han <[email protected]>
---
Changes since v2: add a mutex to sysfs functions according to review
result.
Changes since v1: add mce_sysfs_mutex according to review result.
arch/x86/kernel/cpu/mcheck/mce.c | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 706584681a4c..243f46a40efb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -55,6 +55,7 @@
#include "mce-internal.h"
static DEFINE_MUTEX(mce_log_mutex);
+static DEFINE_MUTEX(mce_sysfs_mutex);
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -2045,8 +2046,11 @@ static void mce_enable_ce(void *all)
return;
cmci_reenable();
cmci_recheck();
- if (all)
+ if (all) {
+ mutex_lock(&mce_sysfs_mutex);
__mcheck_cpu_init_timer();
+ mutex_unlock(&mce_sysfs_mutex);
+ }
}
static struct bus_type mce_subsys = {
@@ -2090,6 +2094,7 @@ static ssize_t set_ignore_ce(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
@@ -2102,6 +2107,8 @@ static ssize_t set_ignore_ce(struct device *s,
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2114,6 +2121,7 @@ static ssize_t set_cmci_disabled(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
@@ -2125,6 +2133,8 @@ static ssize_t set_cmci_disabled(struct device *s,
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2132,8 +2142,16 @@ static ssize_t store_int_with_restart(struct device *s,
struct device_attribute *attr,
const char *buf, size_t size)
{
+ unsigned long old_check_interval = check_interval;
ssize_t ret = device_store_int(s, attr, buf, size);
+
+ if (check_interval == old_check_interval)
+ return ret;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
+
return ret;
}
--
2.16.2
On Sat, Mar 03, 2018 at 05:27:06AM +0900, Seunghun Han wrote:
> I am Seunghun Han and a senior security researcher at National Security
> Research Institute of South Korea.
>
> I found a security issue which can make kernel panic in userspace. After
> analyzing the issue carefully, I found that MCE driver in the kernel has a
> problem which can be occurred in SMP environment.
>
> The check_interval file in
> /sys/devices/system/machinecheck/machinecheck<cpu number> directory is a
> global timer value for MCE polling. If it is changed by one CPU, MCE driver
> in kernel calls mce_restart() function in store_int_with_restart() function
> and broadcasts the event to other CPUs to delete and restart MCE polling
> timer.
>
> The __mcheck_cpu_init_timer() function which is called by mce_restart()
> function initializes the mce_timer variable, and the "lock" in mce_timer is
> also reinitialized. If more than one CPU write a specific value to
> check_interval file concurrently, one can initialize the "lock" in mce_timer
> while the others are handling "lock" in mce_timer. This problem causes some
> synchronization errors such as kernel panic and kernel hang. Other functions
> such as set_ignore_ce(), set_cmci_disabled(), and mce_enable_ce() also
> have synchronization problems.
>
> It could be a security problem because the attacker could make kernel panic
> by writing a value to the check_interval file in userspace, and it could be
> used for Denial-of-Service (DoS) attack.
>
> To fix this problem, I added a mce_sysfs_mutex to serialize requests for
> timer and sysfs functions.
>
> Signed-off-by: Seunghun Han <[email protected]>
> ---
> Changes since v2: add a mutex to sysfs functions according to review
> result.
> Changes since v1: add mce_sysfs_mutex according to review result.
Thanks, I've committed the patch below. Scream if there's still
something not in order:
---
From: Seunghun Han <[email protected]>
Date: Sat, 3 Mar 2018 05:27:06 +0900
Subject: [PATCH] x86/MCE: Synchronize sysfs changes
The check_interval file in
/sys/devices/system/machinecheck/machinecheck<cpu number>
directory is a global timer value for MCE polling. If it is changed by
one CPU, mce_restart() broadcasts the event to other CPUs to delete
and restart the MCE polling timer and __mcheck_cpu_init_timer()
reinitializes the mce_timer variable.
If more than one CPU writes a specific value to the check_interval file
concurrently, mce_timer is not protected from such concurrent accesses
and all kinds of explosions happen.
Since only root can write to those sysfs variables, the issue is not a
big deal security-wise.
However, concurrent writes to these configuration variables is
void of reason so the proper thing to do is to "slow" accesses
down by synchronizing them with a mutex and thus take care of the
synchronization issue too.
Boris:
- make store_int_with_restart() use device_store_ulong() to filter out
negative intervals
- limit min interval to 1 second
- correct locking
- massage commit message
Signed-off-by: Seunghun Han <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: linux-edac <[email protected]>
Cc: x86-ml <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 181f6cf25895..21962c48dad7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -56,6 +56,9 @@
static DEFINE_MUTEX(mce_log_mutex);
+/* sysfs synchronization */
+static DEFINE_MUTEX(mce_sysfs_mutex);
+
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -2104,6 +2107,7 @@ static ssize_t set_ignore_ce(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
@@ -2116,6 +2120,8 @@ static ssize_t set_ignore_ce(struct device *s,
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2128,6 +2134,7 @@ static ssize_t set_cmci_disabled(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
@@ -2139,6 +2146,8 @@ static ssize_t set_cmci_disabled(struct device *s,
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2146,8 +2155,19 @@ static ssize_t store_int_with_restart(struct device *s,
struct device_attribute *attr,
const char *buf, size_t size)
{
- ssize_t ret = device_store_int(s, attr, buf, size);
+ unsigned long old_check_interval = check_interval;
+ ssize_t ret = device_store_ulong(s, attr, buf, size);
+
+ if (check_interval == old_check_interval)
+ return ret;
+
+ if (check_interval < 1)
+ check_interval = 1;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
+
return ret;
}
--
2.13.0
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On Tue, Mar 06, 2018 at 11:43:20AM +0100, Borislav Petkov wrote:
> On Sat, Mar 03, 2018 at 05:27:06AM +0900, Seunghun Han wrote:
> > I am Seunghun Han and a senior security researcher at National Security
> > Research Institute of South Korea.
> >
> > I found a security issue which can make kernel panic in userspace. After
> > analyzing the issue carefully, I found that MCE driver in the kernel has a
> > problem which can be occurred in SMP environment.
> >
> > The check_interval file in
> > /sys/devices/system/machinecheck/machinecheck<cpu number> directory is a
> > global timer value for MCE polling. If it is changed by one CPU, MCE driver
> > in kernel calls mce_restart() function in store_int_with_restart() function
> > and broadcasts the event to other CPUs to delete and restart MCE polling
> > timer.
> >
> > The __mcheck_cpu_init_timer() function which is called by mce_restart()
> > function initializes the mce_timer variable, and the "lock" in mce_timer is
> > also reinitialized. If more than one CPU write a specific value to
> > check_interval file concurrently, one can initialize the "lock" in mce_timer
> > while the others are handling "lock" in mce_timer. This problem causes some
> > synchronization errors such as kernel panic and kernel hang. Other functions
> > such as set_ignore_ce(), set_cmci_disabled(), and mce_enable_ce() also
> > have synchronization problems.
> >
> > It could be a security problem because the attacker could make kernel panic
> > by writing a value to the check_interval file in userspace, and it could be
> > used for Denial-of-Service (DoS) attack.
> >
> > To fix this problem, I added a mce_sysfs_mutex to serialize requests for
> > timer and sysfs functions.
> >
> > Signed-off-by: Seunghun Han <[email protected]>
> > ---
> > Changes since v2: add a mutex to sysfs functions according to review
> > result.
> > Changes since v1: add mce_sysfs_mutex according to review result.
>
> Thanks, I've committed the patch below. Scream if there's still
> something not in order:
It would have been nice to add a cc:stable for this, but I'll try to
watch it and when it hits Linus's tree I'll queue it up there.
thanks,
greg k-h
On Tue, Mar 06, 2018 at 02:57:48AM -0800, Greg Kroah-Hartman wrote:
> It would have been nice to add a cc:stable for this, but I'll try to
> watch it and when it hits Linus's tree I'll queue it up there.
I can still add it - it is the top patch and I haven't sent it yet.
/me goes and adds it.
Voila!
:-)
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On Tue, Mar 06, 2018 at 12:02:08PM +0100, Borislav Petkov wrote:
> On Tue, Mar 06, 2018 at 02:57:48AM -0800, Greg Kroah-Hartman wrote:
> > It would have been nice to add a cc:stable for this, but I'll try to
> > watch it and when it hits Linus's tree I'll queue it up there.
>
> I can still add it - it is the top patch and I haven't sent it yet.
>
> /me goes and adds it.
>
> Voila!
>
> :-)
Wonderful, thanks for doing that.
Commit-ID: b3b7c4795ccab5be71f080774c45bbbcc75c2aaf
Gitweb: https://git.kernel.org/tip/b3b7c4795ccab5be71f080774c45bbbcc75c2aaf
Author: Seunghun Han <[email protected]>
AuthorDate: Tue, 6 Mar 2018 15:21:43 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 8 Mar 2018 15:36:27 +0100
x86/MCE: Serialize sysfs changes
The check_interval file in
/sys/devices/system/machinecheck/machinecheck<cpu number>
directory is a global timer value for MCE polling. If it is changed by one
CPU, mce_restart() broadcasts the event to other CPUs to delete and restart
the MCE polling timer and __mcheck_cpu_init_timer() reinitializes the
mce_timer variable.
If more than one CPU writes a specific value to the check_interval file
concurrently, mce_timer is not protected from such concurrent accesses and
all kinds of explosions happen. Since only root can write to those sysfs
variables, the issue is not a big deal security-wise.
However, concurrent writes to these configuration variables is void of
reason so the proper thing to do is to serialize the access with a mutex.
Boris:
- Make store_int_with_restart() use device_store_ulong() to filter out
negative intervals
- Limit min interval to 1 second
- Correct locking
- Massage commit message
Signed-off-by: Seunghun Han <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: linux-edac <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
---
arch/x86/kernel/cpu/mcheck/mce.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b3323cab9139..466f47301334 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -56,6 +56,9 @@
static DEFINE_MUTEX(mce_log_mutex);
+/* sysfs synchronization */
+static DEFINE_MUTEX(mce_sysfs_mutex);
+
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -2088,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
@@ -2100,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s,
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2112,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
@@ -2123,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s,
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2130,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s,
struct device_attribute *attr,
const char *buf, size_t size)
{
- ssize_t ret = device_store_int(s, attr, buf, size);
+ unsigned long old_check_interval = check_interval;
+ ssize_t ret = device_store_ulong(s, attr, buf, size);
+
+ if (check_interval == old_check_interval)
+ return ret;
+
+ if (check_interval < 1)
+ check_interval = 1;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
+
return ret;
}