I am Seunghun Han and a senior security researcher at National Security
Research Institute of South Korea.
I found a security issue which can make kernel panic in userspace. After
analyzing the issue carefully, I found that MCE driver in the kernel has a
problem which can be occurred in SMP environment.
The check_interval file in
/sys/devices/system/machinecheck/machinecheck<cpu number> directory is a
global timer value for MCE polling. If it is changed by one CPU, MCE driver
in kernel calls mce_restart() function and broadcasts the event to other
CPUs to delete and restart MCE polling timer.
The __mcheck_cpu_init_timer() function which is called by mce_restart()
function initializes the mce_timer variable, and the "lock" in mce_timer is
also reinitialized. If more than one CPU write a specific value to
check_interval file concurrently, one can initialize the "lock" in mce_timer
while the others are handling "lock" in mce_timer. This problem causes some
synchronization errors such as kernel panic and kernel hang.
It is a security problem because the attacker can make kernel panic by
writing a value to the check_interval file in userspace, and it can be used
for Denial-of-Service (DoS) attack.
To fix this problem, I added a mce_sysfs_mutex to serialize requests.
Signed-off-by: Seunghun Han <[email protected]>
---
Changes since v1: add mce_sysfs_mutex according to review result.
arch/x86/kernel/cpu/mcheck/mce.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 706584681a4c..bee0795a3b8c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -55,6 +55,7 @@
#include "mce-internal.h"
static DEFINE_MUTEX(mce_log_mutex);
+static DEFINE_MUTEX(mce_sysfs_mutex);
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -2045,8 +2046,11 @@ static void mce_enable_ce(void *all)
return;
cmci_reenable();
cmci_recheck();
- if (all)
+ if (all) {
+ mutex_lock(&mce_sysfs_mutex);
__mcheck_cpu_init_timer();
+ mutex_unlock(&mce_sysfs_mutex);
+ }
}
static struct bus_type mce_subsys = {
@@ -2132,8 +2136,14 @@ static ssize_t store_int_with_restart(struct device *s,
struct device_attribute *attr,
const char *buf, size_t size)
{
+ unsigned long old_check_interval = check_interval;
ssize_t ret = device_store_int(s, attr, buf, size);
+
+ if (check_interval == old_check_interval)
+ return ret;
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
return ret;
}
--
2.16.2
Hi, Borislav.
I made new patch according to your advice.
The patch is here, https://lkml.org/lkml/2018/2/28/1230.
If you have any advice about it, please let me know.
Best regards.
Seunghun.
2018-03-01 5:31 GMT+09:00 Seunghun Han <[email protected]>:
> I am Seunghun Han and a senior security researcher at National Security
> Research Institute of South Korea.
>
> I found a security issue which can make kernel panic in userspace. After
> analyzing the issue carefully, I found that MCE driver in the kernel has a
> problem which can be occurred in SMP environment.
>
> The check_interval file in
> /sys/devices/system/machinecheck/machinecheck<cpu number> directory is a
> global timer value for MCE polling. If it is changed by one CPU, MCE driver
> in kernel calls mce_restart() function and broadcasts the event to other
> CPUs to delete and restart MCE polling timer.
>
> The __mcheck_cpu_init_timer() function which is called by mce_restart()
> function initializes the mce_timer variable, and the "lock" in mce_timer is
> also reinitialized. If more than one CPU write a specific value to
> check_interval file concurrently, one can initialize the "lock" in mce_timer
> while the others are handling "lock" in mce_timer. This problem causes some
> synchronization errors such as kernel panic and kernel hang.
>
> It is a security problem because the attacker can make kernel panic by
> writing a value to the check_interval file in userspace, and it can be used
> for Denial-of-Service (DoS) attack.
>
> To fix this problem, I added a mce_sysfs_mutex to serialize requests.
>
> Signed-off-by: Seunghun Han <[email protected]>
> ---
> Changes since v1: add mce_sysfs_mutex according to review result.
>
> arch/x86/kernel/cpu/mcheck/mce.c | 12 +++++++++++-
> 1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 706584681a4c..bee0795a3b8c 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -55,6 +55,7 @@
> #include "mce-internal.h"
>
> static DEFINE_MUTEX(mce_log_mutex);
> +static DEFINE_MUTEX(mce_sysfs_mutex);
>
> #define CREATE_TRACE_POINTS
> #include <trace/events/mce.h>
> @@ -2045,8 +2046,11 @@ static void mce_enable_ce(void *all)
> return;
> cmci_reenable();
> cmci_recheck();
> - if (all)
> + if (all) {
> + mutex_lock(&mce_sysfs_mutex);
> __mcheck_cpu_init_timer();
> + mutex_unlock(&mce_sysfs_mutex);
> + }
> }
>
> static struct bus_type mce_subsys = {
> @@ -2132,8 +2136,14 @@ static ssize_t store_int_with_restart(struct device *s,
> struct device_attribute *attr,
> const char *buf, size_t size)
> {
> + unsigned long old_check_interval = check_interval;
> ssize_t ret = device_store_int(s, attr, buf, size);
> +
> + if (check_interval == old_check_interval)
> + return ret;
> + mutex_lock(&mce_sysfs_mutex);
> mce_restart();
> + mutex_unlock(&mce_sysfs_mutex);
> return ret;
> }
>
> --
> 2.16.2
>
On Thu, Mar 01, 2018 at 05:31:31AM +0900, Seunghun Han wrote:
> Changes since v1: add mce_sysfs_mutex according to review result.
Ok, we're getting there.
> arch/x86/kernel/cpu/mcheck/mce.c | 12 +++++++++++-
> 1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 706584681a4c..bee0795a3b8c 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -55,6 +55,7 @@
> #include "mce-internal.h"
>
> static DEFINE_MUTEX(mce_log_mutex);
> +static DEFINE_MUTEX(mce_sysfs_mutex);
>
> #define CREATE_TRACE_POINTS
> #include <trace/events/mce.h>
> @@ -2045,8 +2046,11 @@ static void mce_enable_ce(void *all)
> return;
> cmci_reenable();
> cmci_recheck();
> - if (all)
> + if (all) {
> + mutex_lock(&mce_sysfs_mutex);
> __mcheck_cpu_init_timer();
> + mutex_unlock(&mce_sysfs_mutex);
> + }
Instead of chasing all the places which need to grab the mutex, I was
thinking of grabbing it in all those functions which modify MCA behavior
non-trivially from sysfs. And those are:
store_int_with_restart()
set_ignore_ce()
set_cmci_disabled()
And you've done that already for store_int_with_restart() and I think
doing the same in the other two will address the issue nicely and
in the simplest way possible.
All those sysfs operations should be synchronized anyway. Please reflect
that in your commit message too.
The aspect about DOS is not really important because you need to be root
to write those so if you have root, there are gazillion ways to shoot
yourself in the foot already.
> static struct bus_type mce_subsys = {
> @@ -2132,8 +2136,14 @@ static ssize_t store_int_with_restart(struct device *s,
> struct device_attribute *attr,
> const char *buf, size_t size)
> {
> + unsigned long old_check_interval = check_interval;
> ssize_t ret = device_store_int(s, attr, buf, size);
> +
> + if (check_interval == old_check_interval)
> + return ret;
<---- newline here.
> + mutex_lock(&mce_sysfs_mutex);
> mce_restart();
> + mutex_unlock(&mce_sysfs_mutex);
<---- newline here.
> return ret;
> }
>
> --
> 2.16.2
Thx.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
Hi, Borislav.
Thank you for your good advice.
According to your advice, I will make and send PATCH v3.
Best regards.
Seunghun.
2018-03-02 21:14 GMT+09:00 Borislav Petkov <[email protected]>:
> On Thu, Mar 01, 2018 at 05:31:31AM +0900, Seunghun Han wrote:
>> Changes since v1: add mce_sysfs_mutex according to review result.
>
> Ok, we're getting there.
>
>> arch/x86/kernel/cpu/mcheck/mce.c | 12 +++++++++++-
>> 1 file changed, 11 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
>> index 706584681a4c..bee0795a3b8c 100644
>> --- a/arch/x86/kernel/cpu/mcheck/mce.c
>> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
>> @@ -55,6 +55,7 @@
>> #include "mce-internal.h"
>>
>> static DEFINE_MUTEX(mce_log_mutex);
>> +static DEFINE_MUTEX(mce_sysfs_mutex);
>>
>> #define CREATE_TRACE_POINTS
>> #include <trace/events/mce.h>
>> @@ -2045,8 +2046,11 @@ static void mce_enable_ce(void *all)
>> return;
>> cmci_reenable();
>> cmci_recheck();
>> - if (all)
>> + if (all) {
>> + mutex_lock(&mce_sysfs_mutex);
>> __mcheck_cpu_init_timer();
>> + mutex_unlock(&mce_sysfs_mutex);
>> + }
>
> Instead of chasing all the places which need to grab the mutex, I was
> thinking of grabbing it in all those functions which modify MCA behavior
> non-trivially from sysfs. And those are:
>
> store_int_with_restart()
> set_ignore_ce()
> set_cmci_disabled()
>
> And you've done that already for store_int_with_restart() and I think
> doing the same in the other two will address the issue nicely and
> in the simplest way possible.
>
> All those sysfs operations should be synchronized anyway. Please reflect
> that in your commit message too.
>
> The aspect about DOS is not really important because you need to be root
> to write those so if you have root, there are gazillion ways to shoot
> yourself in the foot already.
>
>> static struct bus_type mce_subsys = {
>> @@ -2132,8 +2136,14 @@ static ssize_t store_int_with_restart(struct device *s,
>> struct device_attribute *attr,
>> const char *buf, size_t size)
>> {
>> + unsigned long old_check_interval = check_interval;
>> ssize_t ret = device_store_int(s, attr, buf, size);
>> +
>> + if (check_interval == old_check_interval)
>> + return ret;
>
> <---- newline here.
>
>> + mutex_lock(&mce_sysfs_mutex);
>> mce_restart();
>> + mutex_unlock(&mce_sysfs_mutex);
>
> <---- newline here.
>
>> return ret;
>> }
>>
>> --
>> 2.16.2
>
> Thx.
>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.