From: Prakruthi Deepak Heragu <[email protected]>
During hotplug, the stolen time data structure is unmapped and memset.
There is a possibility of the timer IRQ being triggered before memset
and stolen time is getting updated as part of this timer IRQ handler. This
causes the below crash in timer handler -
[ 3457.473139][ C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
...
[ 3458.154398][ C5] Call trace:
[ 3458.157648][ C5] para_steal_clock+0x30/0x50
[ 3458.162319][ C5] irqtime_account_process_tick+0x30/0x194
[ 3458.168148][ C5] account_process_tick+0x3c/0x280
[ 3458.173274][ C5] update_process_times+0x5c/0xf4
[ 3458.178311][ C5] tick_sched_timer+0x180/0x384
[ 3458.183164][ C5] __run_hrtimer+0x160/0x57c
[ 3458.187744][ C5] hrtimer_interrupt+0x258/0x684
[ 3458.192698][ C5] arch_timer_handler_virt+0x5c/0xa0
[ 3458.198002][ C5] handle_percpu_devid_irq+0xdc/0x414
[ 3458.203385][ C5] handle_domain_irq+0xa8/0x168
[ 3458.208241][ C5] gic_handle_irq.34493+0x54/0x244
[ 3458.213359][ C5] call_on_irq_stack+0x40/0x70
[ 3458.218125][ C5] do_interrupt_handler+0x60/0x9c
[ 3458.223156][ C5] el1_interrupt+0x34/0x64
[ 3458.227560][ C5] el1h_64_irq_handler+0x1c/0x2c
[ 3458.232503][ C5] el1h_64_irq+0x7c/0x80
[ 3458.236736][ C5] free_vmap_area_noflush+0x108/0x39c
[ 3458.242126][ C5] remove_vm_area+0xbc/0x118
[ 3458.246714][ C5] vm_remove_mappings+0x48/0x2a4
[ 3458.251656][ C5] __vunmap+0x154/0x278
[ 3458.255796][ C5] stolen_time_cpu_down_prepare+0xc0/0xd8
[ 3458.261542][ C5] cpuhp_invoke_callback+0x248/0xc34
[ 3458.266842][ C5] cpuhp_thread_fun+0x1c4/0x248
[ 3458.271696][ C5] smpboot_thread_fn+0x1b0/0x400
[ 3458.276638][ C5] kthread+0x17c/0x1e0
[ 3458.280691][ C5] ret_from_fork+0x10/0x20
As a fix, introduce rcu lock to update stolen time structure.
Suggested-by: Will Deacon <[email protected]>
Signed-off-by: Prakruthi Deepak Heragu <[email protected]>
Signed-off-by: Elliot Berman <[email protected]>
---
Changes since v2: https://lore.kernel.org/all/[email protected]/
- Clean up sparse warnings and apply suggestion from Will
Changes since v1: https://lore.kernel.org/all/[email protected]/
- Use RCU instead of disabling interrupts
arch/arm64/kernel/paravirt.c | 29 +++++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 75fed4460407..57c7c211f8c7 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
struct pv_time_stolen_time_region {
- struct pvclock_vcpu_stolen_time *kaddr;
+ struct pvclock_vcpu_stolen_time __rcu *kaddr;
};
static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
/* return stolen time in ns by asking the hypervisor */
static u64 para_steal_clock(int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
+ u64 ret = 0;
reg = per_cpu_ptr(&stolen_time_region, cpu);
@@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu)
* online notification callback runs. Until the callback
* has run we just return zero.
*/
- if (!reg->kaddr)
+ rcu_read_lock();
+ kaddr = rcu_dereference(reg->kaddr);
+ if (!kaddr) {
+ rcu_read_unlock();
return 0;
+ }
- return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+ ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+ rcu_read_unlock();
+ return ret;
}
static int stolen_time_cpu_down_prepare(unsigned int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
reg = this_cpu_ptr(&stolen_time_region);
if (!reg->kaddr)
return 0;
- memunmap(reg->kaddr);
- memset(reg, 0, sizeof(*reg));
+ kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
+ synchronize_rcu();
+ memunmap(kaddr);
return 0;
}
static int stolen_time_cpu_online(unsigned int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
struct arm_smccc_res res;
@@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsigned int cpu)
if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
return -EINVAL;
- reg->kaddr = memremap(res.a0,
+ kaddr = memremap(res.a0,
sizeof(struct pvclock_vcpu_stolen_time),
MEMREMAP_WB);
+ rcu_assign_pointer(reg->kaddr, kaddr);
+
if (!reg->kaddr) {
pr_warn("Failed to map stolen time data structure\n");
return -ENOMEM;
}
- if (le32_to_cpu(reg->kaddr->revision) != 0 ||
- le32_to_cpu(reg->kaddr->attributes) != 0) {
+ if (le32_to_cpu(kaddr->revision) != 0 ||
+ le32_to_cpu(kaddr->attributes) != 0) {
pr_warn_once("Unexpected revision or attributes in stolen time data\n");
return -ENXIO;
}
--
2.25.1
On 5/13/22 10:46 AM, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <[email protected]>
>
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
>
> [ 3457.473139][ C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
> ...
> [ 3458.154398][ C5] Call trace:
> [ 3458.157648][ C5] para_steal_clock+0x30/0x50
> [ 3458.162319][ C5] irqtime_account_process_tick+0x30/0x194
> [ 3458.168148][ C5] account_process_tick+0x3c/0x280
> [ 3458.173274][ C5] update_process_times+0x5c/0xf4
> [ 3458.178311][ C5] tick_sched_timer+0x180/0x384
> [ 3458.183164][ C5] __run_hrtimer+0x160/0x57c
> [ 3458.187744][ C5] hrtimer_interrupt+0x258/0x684
> [ 3458.192698][ C5] arch_timer_handler_virt+0x5c/0xa0
> [ 3458.198002][ C5] handle_percpu_devid_irq+0xdc/0x414
> [ 3458.203385][ C5] handle_domain_irq+0xa8/0x168
> [ 3458.208241][ C5] gic_handle_irq.34493+0x54/0x244
> [ 3458.213359][ C5] call_on_irq_stack+0x40/0x70
> [ 3458.218125][ C5] do_interrupt_handler+0x60/0x9c
> [ 3458.223156][ C5] el1_interrupt+0x34/0x64
> [ 3458.227560][ C5] el1h_64_irq_handler+0x1c/0x2c
> [ 3458.232503][ C5] el1h_64_irq+0x7c/0x80
> [ 3458.236736][ C5] free_vmap_area_noflush+0x108/0x39c
> [ 3458.242126][ C5] remove_vm_area+0xbc/0x118
> [ 3458.246714][ C5] vm_remove_mappings+0x48/0x2a4
> [ 3458.251656][ C5] __vunmap+0x154/0x278
> [ 3458.255796][ C5] stolen_time_cpu_down_prepare+0xc0/0xd8
> [ 3458.261542][ C5] cpuhp_invoke_callback+0x248/0xc34
> [ 3458.266842][ C5] cpuhp_thread_fun+0x1c4/0x248
> [ 3458.271696][ C5] smpboot_thread_fn+0x1b0/0x400
> [ 3458.276638][ C5] kthread+0x17c/0x1e0
> [ 3458.280691][ C5] ret_from_fork+0x10/0x20
>
> As a fix, introduce rcu lock to update stolen time structure.
>
> Suggested-by: Will Deacon <[email protected]>
> Signed-off-by: Prakruthi Deepak Heragu <[email protected]>
> Signed-off-by: Elliot Berman <[email protected]>
> ---
Looks good to me, but one quick question though (see below).
Reviewed-by: Srivatsa S. Bhat (VMware) <[email protected]>
>
> static int stolen_time_cpu_down_prepare(unsigned int cpu)
> {
> + struct pvclock_vcpu_stolen_time *kaddr = NULL;
> struct pv_time_stolen_time_region *reg;
>
> reg = this_cpu_ptr(&stolen_time_region);
> if (!reg->kaddr)
> return 0;
>
> - memunmap(reg->kaddr);
> - memset(reg, 0, sizeof(*reg));
> + kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
> + synchronize_rcu();
> + memunmap(kaddr);
>
The original code used to memset the stolen time region, but this
patch seems to drop it. Was that change intentional?
Regards,
Srivatsa
On Fri, May 13, 2022 at 04:32:53PM -0700, Srivatsa S. Bhat wrote:
> On 5/13/22 10:46 AM, Elliot Berman wrote:
> > From: Prakruthi Deepak Heragu <[email protected]>
> >
> > During hotplug, the stolen time data structure is unmapped and memset.
> > There is a possibility of the timer IRQ being triggered before memset
> > and stolen time is getting updated as part of this timer IRQ handler. This
> > causes the below crash in timer handler -
> >
> > [ 3457.473139][ C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
> > ...
> > [ 3458.154398][ C5] Call trace:
> > [ 3458.157648][ C5] para_steal_clock+0x30/0x50
> > [ 3458.162319][ C5] irqtime_account_process_tick+0x30/0x194
> > [ 3458.168148][ C5] account_process_tick+0x3c/0x280
> > [ 3458.173274][ C5] update_process_times+0x5c/0xf4
> > [ 3458.178311][ C5] tick_sched_timer+0x180/0x384
> > [ 3458.183164][ C5] __run_hrtimer+0x160/0x57c
> > [ 3458.187744][ C5] hrtimer_interrupt+0x258/0x684
> > [ 3458.192698][ C5] arch_timer_handler_virt+0x5c/0xa0
> > [ 3458.198002][ C5] handle_percpu_devid_irq+0xdc/0x414
> > [ 3458.203385][ C5] handle_domain_irq+0xa8/0x168
> > [ 3458.208241][ C5] gic_handle_irq.34493+0x54/0x244
> > [ 3458.213359][ C5] call_on_irq_stack+0x40/0x70
> > [ 3458.218125][ C5] do_interrupt_handler+0x60/0x9c
> > [ 3458.223156][ C5] el1_interrupt+0x34/0x64
> > [ 3458.227560][ C5] el1h_64_irq_handler+0x1c/0x2c
> > [ 3458.232503][ C5] el1h_64_irq+0x7c/0x80
> > [ 3458.236736][ C5] free_vmap_area_noflush+0x108/0x39c
> > [ 3458.242126][ C5] remove_vm_area+0xbc/0x118
> > [ 3458.246714][ C5] vm_remove_mappings+0x48/0x2a4
> > [ 3458.251656][ C5] __vunmap+0x154/0x278
> > [ 3458.255796][ C5] stolen_time_cpu_down_prepare+0xc0/0xd8
> > [ 3458.261542][ C5] cpuhp_invoke_callback+0x248/0xc34
> > [ 3458.266842][ C5] cpuhp_thread_fun+0x1c4/0x248
> > [ 3458.271696][ C5] smpboot_thread_fn+0x1b0/0x400
> > [ 3458.276638][ C5] kthread+0x17c/0x1e0
> > [ 3458.280691][ C5] ret_from_fork+0x10/0x20
> >
> > As a fix, introduce rcu lock to update stolen time structure.
> >
> > Suggested-by: Will Deacon <[email protected]>
> > Signed-off-by: Prakruthi Deepak Heragu <[email protected]>
> > Signed-off-by: Elliot Berman <[email protected]>
> > ---
>
> Looks good to me, but one quick question though (see below).
>
> Reviewed-by: Srivatsa S. Bhat (VMware) <[email protected]>
Cheers.
> > static int stolen_time_cpu_down_prepare(unsigned int cpu)
> > {
> > + struct pvclock_vcpu_stolen_time *kaddr = NULL;
> > struct pv_time_stolen_time_region *reg;
> >
> > reg = this_cpu_ptr(&stolen_time_region);
> > if (!reg->kaddr)
> > return 0;
> >
> > - memunmap(reg->kaddr);
> > - memset(reg, 0, sizeof(*reg));
> > + kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
> > + synchronize_rcu();
> > + memunmap(kaddr);
> >
>
> The original code used to memset the stolen time region, but this
> patch seems to drop it. Was that change intentional?
'struct pv_time_stolen_time_region' only has one field ('kaddr'), which
we're now clearing with rcu_replace_pointer() so the memset doesn't make
sense.
Will
On Fri, 13 May 2022 10:46:54 -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <[email protected]>
>
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
>
> [...]
Applied to arm64 (for-next/fixes), thanks!
[1/1] arm64: paravirt: Use RCU read locks to guard stolen_time
https://git.kernel.org/arm64/c/19bef63f951e
Cheers,
--
Will
https://fixes.arm64.dev
https://next.arm64.dev
https://will.arm64.dev