2021-01-03 23:17:25

by Johnathan Smithinovic

[permalink] [raw]
Subject: PROBLEM: CPU hotplug leads to NULL pointer dereference with RAPL enabled on AMD 2990WX

CPU hotplug leads to NULL pointer dereference with RAPL enabled on AMD 2990WX


When hot-plugging CPUs (e.g. manually or on suspend) I get a NULL
pointer dereference in rapl_cpu_offline() for CPUs 16 and 24.
It *seems* to me that this has to do with commit
700d098acec5271161606f3c0086b71695ea2ef8
("x86/CPU/AMD: Save AMD NodeId as cpu_die_id").
When reverting said commit hotplug works again.


I have been able to reproduce this on 5.10.4 and 5.11-rc1; 5.4.86 appears
to be not affected because RAPL does not seem to get enabled for the CPU
in use.
(CPUs 16 and 24 are not siblings according to
/sys/devices/system/cpu/cpu16/topology/thread_siblings_list in case that is
relevant.)


Steps to reproduce (on my system (ubuntu 18.04), as root):
echo 0 > /sys/devices/system/cpu/cpu16/hotplug/target
(When turning off RAPL by unsetting "PERF_EVENTS_INTEL_RAPL" I seem to be able
to hot-plug the mentioned CPUs without reverting 700d098acec5.)

(In order to be able to test this with 5.11-rc1 on my system I had to revert
commit f36a74b9345aebaf5d325380df87a54720229d18.)


Relevant part of dmesg when trying to turn off CPU16:

[ 46.828426] BUG: kernel NULL pointer dereference, address: 0000000000000008
[ 46.828467] #PF: supervisor write access in kernel mode
[ 46.828493] #PF: error_code(0x0002) - not-present page
[ 46.828518] PGD 0 P4D 0
[ 46.828534] Oops: 0002 [#1] SMP NOPTI
[ 46.828554] CPU: 16 PID: 107 Comm: cpuhp/16 Not tainted 5.11.0-rc1-bisect #1
[ 46.828589] Hardware name: System manufacturer System Product Name/ROG ZENITH
EXTREME, BIOS 1701 01/09/2019
[ 46.828632] RIP: 0010:rapl_cpu_offline+0x51/0xb0 [rapl]
[ 46.828663] Code: 0d 84 2c 00 00 45 31 e4 3b 91 28 01 00 00 73 08 4c 8b a4 d1
30 01 00 00 f0 48 0f b3 05 68 28 00 00 72 07 5b 31 c0 41 5c 5d c3 <41> c7 44 24
08 ff ff ff ff 48 8b 04 c5 00 89 28 9d 89 fb 48 c7 c2
[ 46.828746] RSP: 0018:ffffaaba8397be48 EFLAGS: 00010247
[ 46.828773] RAX: 0000000000000010 RBX: ffff9963ee018760 RCX: ffff9960108cbc00
[ 46.828807] RDX: 0000000000000002 RSI: 000000000000009e RDI: 0000000000000010
[ 46.828839] RBP: ffffaaba8397be58 R08: ffff9963ee018788 R09: 0000000000000000
[ 46.828872] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[ 46.828905] R13: 0000000000000010 R14: ffff9963ee018788 R15: ffff9960020ec8c0
[ 46.828938] FS: 0000000000000000(0000) GS:ffff9963ee000000(0000)
knlGS:0000000000000000
[ 46.828975] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 46.829003] CR2: 0000000000000008 CR3: 000000076b810000 CR4: 00000000003506e0
[ 46.829036] Call Trace:
[ 46.829052] ? rapl_hrtimer_handle+0xa0/0xa0 [rapl]
[ 46.829078] cpuhp_invoke_callback+0x85/0x410
[ 46.829105] ? sort_range+0x30/0x30
[ 46.829125] cpuhp_thread_fun+0xb8/0x120
[ 46.829145] smpboot_thread_fn+0xfc/0x170
[ 46.829166] kthread+0x126/0x140
[ 46.829185] ? kthread_park+0x90/0x90
[ 46.829206] ret_from_fork+0x22/0x30
[ 46.829229] Modules linked in: xt_CHECKSUM iptable_mangle xt_MASQUERADE
bridge stp llc iptable_filter xt_nat xt_tcpudp iptable_nat nf_nat nf_conntrack
nf_defrag_ipv6 nf_defrag_ipv4 bpfilter binfmt_misc snd_hda_codec_realtek
snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi snd_hda_intel
snd_intel_dspcfg nls_iso8859_1 soundwire_intel soundwire_generic_allocation
soundwire_cadence joydev input_leds snd_hda_codec snd_hda_core snd_hwdep
intel_rapl_msr soundwire_bus intel_rapl_common snd_soc_core snd_compress
amd64_edac_mod ac97_bus snd_pcm_dmaengine edac_mce_amd snd_pcm kvm_amd
snd_seq_midi kvm snd_seq_midi_event rapl snd_rawmidi eeepc_wmi asus_wmi
sparse_keymap snd_seq video wmi_bmof snd_seq_device snd_timer snd efi_pstore
soundcore k10temp ccp mac_hid sch_fq_codel ib_iser rdma_cm iw_cm ib_cm ib_core
iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi parport_pc ppdev lp parport
ip_tables x_tables autofs4 btrfs blake2b_generic raid10 raid456
async_raid6_recov async_memcpy async_pq
[ 46.829309] async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath
linear hid_cherry hid_generic usbhid hid mxm_wmi crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper nvme igb
nvme_core dca ahci i2c_piix4 xhci_pci i2c_algo_bit libahci xhci_pci_renesas
gpio_amdpt wmi gpio_generic
[ 46.829830] CR2: 0000000000000008
[ 46.829848] ---[ end trace 5c9c68d47cf771f6 ]---
[ 46.881781] RIP: 0010:rapl_cpu_offline+0x51/0xb0 [rapl]
[ 46.881808] Code: 0d 84 2c 00 00 45 31 e4 3b 91 28 01 00 00 73 08 4c 8b a4 d1
30 01 00 00 f0 48 0f b3 05 68 28 00 00 72 07 5b 31 c0 41 5c 5d c3 <41> c7 44 24
08 ff ff ff ff 48 8b 04 c5 00 89 28 9d 89 fb 48 c7 c2
[ 46.881885] RSP: 0018:ffffaaba8397be48 EFLAGS: 00010247
[ 46.882413] RAX: 0000000000000010 RBX: ffff9963ee018760 RCX: ffff9960108cbc00
[ 46.886575] RDX: 0000000000000002 RSI: 000000000000009e RDI: 0000000000000010
[ 46.891210] RBP: ffffaaba8397be58 R08: ffff9963ee018788 R09: 0000000000000000
[ 46.895849] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[ 46.900579] R13: 0000000000000010 R14: ffff9963ee018788 R15: ffff9960020ec8c0
[ 46.904623] FS: 0000000000000000(0000) GS:ffff9963ee000000(0000)
knlGS:0000000000000000
[ 46.909198] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 46.913625] CR2: 0000000000000008 CR3: 00000004adf54000 CR4: 00000000003506e0




Kind regards
John



2021-01-04 09:52:34

by Borislav Petkov

[permalink] [raw]
Subject: Re: PROBLEM: CPU hotplug leads to NULL pointer dereference with RAPL enabled on AMD 2990WX

On Mon, Jan 04, 2021 at 12:02:44AM +0100, Johnathan Smithinovic wrote:
> CPU hotplug leads to NULL pointer dereference with RAPL enabled on AMD 2990WX
>
>
> When hot-plugging CPUs (e.g. manually or on suspend) I get a NULL
> pointer dereference in rapl_cpu_offline() for CPUs 16 and 24.
> It *seems* to me that this has to do with commit
> 700d098acec5271161606f3c0086b71695ea2ef8
> ("x86/CPU/AMD: Save AMD NodeId as cpu_die_id").
> When reverting said commit hotplug works again.

Yeah, known issue and I'm working on it.

Thx for reporting.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2021-01-04 10:54:32

by Borislav Petkov

[permalink] [raw]
Subject: Re: PROBLEM: CPU hotplug leads to NULL pointer dereference with RAPL enabled on AMD 2990WX

On Mon, Jan 04, 2021 at 10:50:10AM +0100, Borislav Petkov wrote:
> On Mon, Jan 04, 2021 at 12:02:44AM +0100, Johnathan Smithinovic wrote:
> > CPU hotplug leads to NULL pointer dereference with RAPL enabled on AMD 2990WX
> >
> >
> > When hot-plugging CPUs (e.g. manually or on suspend) I get a NULL
> > pointer dereference in rapl_cpu_offline() for CPUs 16 and 24.
> > It *seems* to me that this has to do with commit
> > 700d098acec5271161606f3c0086b71695ea2ef8
> > ("x86/CPU/AMD: Save AMD NodeId as cpu_die_id").
> > When reverting said commit hotplug works again.
>
> Yeah, known issue and I'm working on it.

I can't get my box to generate the topology config yours has so can you
run the debug patch below on your system on latest Linus tree, offline
cores (it should prevent the oops so that you can catch dmesg) and then
send me a full dmesg, private mail's fine too.

Thx.

---
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 7dbbeaacd995..19563faa58ae 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -139,10 +139,13 @@ static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
static struct perf_msr *rapl_msrs;

-static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu, bool dbg)
{
unsigned int dieid = topology_logical_die_id(cpu);

+ if (dbg)
+ pr_info("%s: CPU%d, dieid: %d\n", __func__, cpu, dieid);
+
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
@@ -360,7 +363,7 @@ static int rapl_pmu_event_init(struct perf_event *event)
return -EINVAL;

/* must be done before validate_group */
- pmu = cpu_to_rapl_pmu(event->cpu);
+ pmu = cpu_to_rapl_pmu(event->cpu, false);
if (!pmu)
return -EINVAL;
event->cpu = pmu->cpu;
@@ -543,13 +546,16 @@ static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {

static int rapl_cpu_offline(unsigned int cpu)
{
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu, true);
int target;

/* Check if exiting cpu is used for collecting rapl events */
if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
return 0;

+ if (WARN_ON(!pmu))
+ return -1;
+
pmu->cpu = -1;
/* Find a new cpu to collect rapl events */
target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
@@ -565,7 +571,7 @@ static int rapl_cpu_offline(unsigned int cpu)

static int rapl_cpu_online(unsigned int cpu)
{
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu, true);
int target;

if (!pmu) {
@@ -682,6 +688,8 @@ static int __init init_rapl_pmus(void)
int maxdie = topology_max_packages() * topology_max_die_per_package();
size_t size;

+ pr_info("%s: maxdie: %d\n", __func__, maxdie);
+
size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
rapl_pmus = kzalloc(size, GFP_KERNEL);
if (!rapl_pmus)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8ca66af96a54..20343682aace 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -319,6 +319,11 @@ int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
for_each_possible_cpu(cpu) {
struct cpuinfo_x86 *c = &cpu_data(cpu);

+ pr_info("%s: init: %d, cpu %d, cur_cpu: %d, cpu_die_id: %d, die_id: %d, "
+ "phys_proc_id: %d, proc_id: %d, logical_die_id: %d\n",
+ __func__, c->initialized, cpu, cur_cpu, c->cpu_die_id, die_id,
+ c->phys_proc_id, proc_id, c->logical_die_id);
+
if (c->initialized && c->cpu_die_id == die_id &&
c->phys_proc_id == proc_id)
return c->logical_die_id;

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette