Subject: [PATCH] x86: Convert mce timer to hrtimer

From: Thomas Gleixner <[email protected]>

mce_timer is started in atomic contexts of cpu bringup. This results
in might_sleep() warnings on RT. Convert mce_timer to a hrtimer to
avoid this.

Cc: Tony Luck <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Cc: [email protected] (open list:X86 MCE INFRASTRUCTURE)
Signed-off-by: Thomas Gleixner <[email protected]>
fold in:
|From: Mike Galbraith <[email protected]>
|Date: Wed, 29 May 2013 13:52:13 +0200
|Subject: [PATCH] x86/mce: fix mce timer interval
|
|Seems mce timer fire at the wrong frequency in -rt kernels since roughly
|forever due to 32 bit overflow. 3.8-rt is also missing a multiplier.
|
|Add missing us -> ns conversion and 32 bit overflow prevention.
|
|Signed-off-by: Mike Galbraith <[email protected]>
|[bigeasy: use ULL instead of u64 cast]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 52 +++++++++++++++++++--------------------
1 file changed, 26 insertions(+), 26 deletions(-)

--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -41,6 +41,7 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
+#include <linux/jiffies.h>
#include <linux/jump_label.h>

#include <asm/intel-family.h>
@@ -1358,7 +1359,7 @@ int memory_failure(unsigned long pfn, in
static unsigned long check_interval = INITIAL_CHECK_INTERVAL;

static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
-static DEFINE_PER_CPU(struct timer_list, mce_timer);
+static DEFINE_PER_CPU(struct hrtimer, mce_timer);

static unsigned long mce_adjust_timer_default(unsigned long interval)
{
@@ -1367,26 +1368,18 @@ static unsigned long mce_adjust_timer_de

static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;

-static void __start_timer(struct timer_list *t, unsigned long interval)
+static void __start_timer(struct hrtimer *t, unsigned long iv)
{
- unsigned long when = jiffies + interval;
- unsigned long flags;
-
- local_irq_save(flags);
-
- if (!timer_pending(t) || time_before(when, t->expires))
- mod_timer(t, round_jiffies(when));
-
- local_irq_restore(flags);
+ if (!iv)
+ return;
+ hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
+ 0, HRTIMER_MODE_REL_PINNED);
}

-static void mce_timer_fn(struct timer_list *t)
+static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
{
- struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
unsigned long iv;

- WARN_ON(cpu_t != t);
-
iv = __this_cpu_read(mce_next_interval);

if (mce_available(this_cpu_ptr(&cpu_info))) {
@@ -1409,7 +1402,11 @@ static void mce_timer_fn(struct timer_li

done:
__this_cpu_write(mce_next_interval, iv);
- __start_timer(t, iv);
+ if (!iv)
+ return HRTIMER_NORESTART;
+
+ hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(iv)));
+ return HRTIMER_RESTART;
}

/*
@@ -1417,7 +1414,7 @@ static void mce_timer_fn(struct timer_li
*/
void mce_timer_kick(unsigned long interval)
{
- struct timer_list *t = this_cpu_ptr(&mce_timer);
+ struct hrtimer *t = this_cpu_ptr(&mce_timer);
unsigned long iv = __this_cpu_read(mce_next_interval);

__start_timer(t, interval);
@@ -1432,7 +1429,7 @@ static void mce_timer_delete_all(void)
int cpu;

for_each_online_cpu(cpu)
- del_timer_sync(&per_cpu(mce_timer, cpu));
+ hrtimer_cancel(&per_cpu(mce_timer, cpu));
}

/*
@@ -1761,7 +1758,7 @@ static void __mcheck_cpu_clear_vendor(st
}
}

-static void mce_start_timer(struct timer_list *t)
+static void mce_start_timer(struct hrtimer *t)
{
unsigned long iv = check_interval * HZ;

@@ -1774,16 +1771,19 @@ static void mce_start_timer(struct timer

static void __mcheck_cpu_setup_timer(void)
{
- struct timer_list *t = this_cpu_ptr(&mce_timer);
+ struct hrtimer *t = this_cpu_ptr(&mce_timer);

- timer_setup(t, mce_timer_fn, TIMER_PINNED);
+ hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ t->function = mce_timer_fn;
}

static void __mcheck_cpu_init_timer(void)
{
- struct timer_list *t = this_cpu_ptr(&mce_timer);
+ struct hrtimer *t = this_cpu_ptr(&mce_timer);
+
+ hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ t->function = mce_timer_fn;

- timer_setup(t, mce_timer_fn, TIMER_PINNED);
mce_start_timer(t);
}

@@ -2285,7 +2285,7 @@ static int mce_cpu_dead(unsigned int cpu

static int mce_cpu_online(unsigned int cpu)
{
- struct timer_list *t = this_cpu_ptr(&mce_timer);
+ struct hrtimer *t = this_cpu_ptr(&mce_timer);
int ret;

mce_device_create(cpu);
@@ -2302,10 +2302,10 @@ static int mce_cpu_online(unsigned int c

static int mce_cpu_pre_down(unsigned int cpu)
{
- struct timer_list *t = this_cpu_ptr(&mce_timer);
+ struct hrtimer *t = this_cpu_ptr(&mce_timer);

mce_disable_cpu();
- del_timer_sync(t);
+ hrtimer_cancel(t);
mce_threshold_remove_device(cpu);
mce_device_remove(cpu);
return 0;


Subject: [PATCH] x86: UV: raw_spinlock conversion

From: Mike Galbraith <[email protected]>

Shrug. Lots of hobbyists have a beast in their basement, right?

Cc: [email protected]
Signed-off-by: Mike Galbraith <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
arch/x86/include/asm/uv/uv_bau.h | 14 +++++++-------
arch/x86/platform/uv/tlb_uv.c | 26 +++++++++++++-------------
arch/x86/platform/uv/uv_time.c | 20 ++++++++++++--------
3 files changed, 32 insertions(+), 28 deletions(-)

--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -643,9 +643,9 @@ struct bau_control {
cycles_t send_message;
cycles_t period_end;
cycles_t period_time;
- spinlock_t uvhub_lock;
- spinlock_t queue_lock;
- spinlock_t disable_lock;
+ raw_spinlock_t uvhub_lock;
+ raw_spinlock_t queue_lock;
+ raw_spinlock_t disable_lock;
/* tunables */
int max_concurr;
int max_concurr_const;
@@ -847,15 +847,15 @@ static inline int atom_asr(short i, stru
* to be lowered below the current 'v'. atomic_add_unless can only stop
* on equal.
*/
-static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
+static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
{
- spin_lock(lock);
+ raw_spin_lock(lock);
if (atomic_read(v) >= u) {
- spin_unlock(lock);
+ raw_spin_unlock(lock);
return 0;
}
atomic_inc(v);
- spin_unlock(lock);
+ raw_spin_unlock(lock);
return 1;
}

--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -740,9 +740,9 @@ static void destination_plugged(struct b

quiesce_local_uvhub(hmaster);

- spin_lock(&hmaster->queue_lock);
+ raw_spin_lock(&hmaster->queue_lock);
reset_with_ipi(&bau_desc->distribution, bcp);
- spin_unlock(&hmaster->queue_lock);
+ raw_spin_unlock(&hmaster->queue_lock);

end_uvhub_quiesce(hmaster);

@@ -762,9 +762,9 @@ static void destination_timeout(struct b

quiesce_local_uvhub(hmaster);

- spin_lock(&hmaster->queue_lock);
+ raw_spin_lock(&hmaster->queue_lock);
reset_with_ipi(&bau_desc->distribution, bcp);
- spin_unlock(&hmaster->queue_lock);
+ raw_spin_unlock(&hmaster->queue_lock);

end_uvhub_quiesce(hmaster);

@@ -785,7 +785,7 @@ static void disable_for_period(struct ba
cycles_t tm1;

hmaster = bcp->uvhub_master;
- spin_lock(&hmaster->disable_lock);
+ raw_spin_lock(&hmaster->disable_lock);
if (!bcp->baudisabled) {
stat->s_bau_disabled++;
tm1 = get_cycles();
@@ -798,7 +798,7 @@ static void disable_for_period(struct ba
}
}
}
- spin_unlock(&hmaster->disable_lock);
+ raw_spin_unlock(&hmaster->disable_lock);
}

static void count_max_concurr(int stat, struct bau_control *bcp,
@@ -861,7 +861,7 @@ static void record_send_stats(cycles_t t
*/
static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
{
- spinlock_t *lock = &hmaster->uvhub_lock;
+ raw_spinlock_t *lock = &hmaster->uvhub_lock;
atomic_t *v;

v = &hmaster->active_descriptor_count;
@@ -995,7 +995,7 @@ static int check_enable(struct bau_contr
struct bau_control *hmaster;

hmaster = bcp->uvhub_master;
- spin_lock(&hmaster->disable_lock);
+ raw_spin_lock(&hmaster->disable_lock);
if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
stat->s_bau_reenabled++;
for_each_present_cpu(tcpu) {
@@ -1007,10 +1007,10 @@ static int check_enable(struct bau_contr
tbcp->period_giveups = 0;
}
}
- spin_unlock(&hmaster->disable_lock);
+ raw_spin_unlock(&hmaster->disable_lock);
return 0;
}
- spin_unlock(&hmaster->disable_lock);
+ raw_spin_unlock(&hmaster->disable_lock);
return -1;
}

@@ -1941,9 +1941,9 @@ static void __init init_per_cpu_tunables
bcp->cong_reps = congested_reps;
bcp->disabled_period = sec_2_cycles(disabled_period);
bcp->giveup_limit = giveup_limit;
- spin_lock_init(&bcp->queue_lock);
- spin_lock_init(&bcp->uvhub_lock);
- spin_lock_init(&bcp->disable_lock);
+ raw_spin_lock_init(&bcp->queue_lock);
+ raw_spin_lock_init(&bcp->uvhub_lock);
+ raw_spin_lock_init(&bcp->disable_lock);
}
}

--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct clock_event

/* There is one of these allocated per node */
struct uv_rtc_timer_head {
- spinlock_t lock;
+ raw_spinlock_t lock;
/* next cpu waiting for timer, local node relative: */
int next_cpu;
/* number of cpus on this node: */
@@ -177,7 +177,7 @@ static __init int uv_rtc_allocate_timers
uv_rtc_deallocate_timers();
return -ENOMEM;
}
- spin_lock_init(&head->lock);
+ raw_spin_lock_init(&head->lock);
head->ncpus = uv_blade_nr_possible_cpus(bid);
head->next_cpu = -1;
blade_info[bid] = head;
@@ -231,7 +231,7 @@ static int uv_rtc_set_timer(int cpu, u64
unsigned long flags;
int next_cpu;

- spin_lock_irqsave(&head->lock, flags);
+ raw_spin_lock_irqsave(&head->lock, flags);

next_cpu = head->next_cpu;
*t = expires;
@@ -243,12 +243,12 @@ static int uv_rtc_set_timer(int cpu, u64
if (uv_setup_intr(cpu, expires)) {
*t = ULLONG_MAX;
uv_rtc_find_next_timer(head, pnode);
- spin_unlock_irqrestore(&head->lock, flags);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
return -ETIME;
}
}

- spin_unlock_irqrestore(&head->lock, flags);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
return 0;
}

@@ -267,7 +267,7 @@ static int uv_rtc_unset_timer(int cpu, i
unsigned long flags;
int rc = 0;

- spin_lock_irqsave(&head->lock, flags);
+ raw_spin_lock_irqsave(&head->lock, flags);

if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
rc = 1;
@@ -279,7 +279,7 @@ static int uv_rtc_unset_timer(int cpu, i
uv_rtc_find_next_timer(head, pnode);
}

- spin_unlock_irqrestore(&head->lock, flags);
+ raw_spin_unlock_irqrestore(&head->lock, flags);

return rc;
}
@@ -299,13 +299,17 @@ static int uv_rtc_unset_timer(int cpu, i
static u64 uv_read_rtc(struct clocksource *cs)
{
unsigned long offset;
+ u64 cycles;

+ preempt_disable();
if (uv_get_min_hub_revision_id() == 1)
offset = 0;
else
offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;

- return (u64)uv_read_local_mmr(UVH_RTC | offset);
+ cycles = (u64)uv_read_local_mmr(UVH_RTC | offset);
+ preempt_enable();
+ return cycles;
}

/*

2018-05-06 10:29:29

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On Fri, 4 May 2018, Sebastian Andrzej Siewior wrote:

> From: Mike Galbraith <[email protected]>
>
> Shrug. Lots of hobbyists have a beast in their basement, right?

This hardly qualifies as a proper changelog ...

> }
> @@ -299,13 +299,17 @@ static int uv_rtc_unset_timer(int cpu, i
> static u64 uv_read_rtc(struct clocksource *cs)
> {
> unsigned long offset;
> + u64 cycles;
>
> + preempt_disable();
> if (uv_get_min_hub_revision_id() == 1)
> offset = 0;
> else
> offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
>
> - return (u64)uv_read_local_mmr(UVH_RTC | offset);
> + cycles = (u64)uv_read_local_mmr(UVH_RTC | offset);
> + preempt_enable();
> + return cycles;

And how exaclty is this hunk related?

Thanks,

tglx

2018-05-06 10:59:55

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On Sun, 2018-05-06 at 12:26 +0200, Thomas Gleixner wrote:
> On Fri, 4 May 2018, Sebastian Andrzej Siewior wrote:
>
> > From: Mike Galbraith <[email protected]>
> >
> > Shrug. Lots of hobbyists have a beast in their basement, right?
>
> This hardly qualifies as a proper changelog ...

Hm, that wasn't intended to be a changelog.

This patch may not be current either, I haven't tested RT on a UV box
in quite some time.

-Mike


Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On 2018-05-06 12:59:19 [+0200], Mike Galbraith wrote:
> On Sun, 2018-05-06 at 12:26 +0200, Thomas Gleixner wrote:
> > On Fri, 4 May 2018, Sebastian Andrzej Siewior wrote:
> >
> > > From: Mike Galbraith <[email protected]>
> > >
> > > Shrug. Lots of hobbyists have a beast in their basement, right?
> >
> > This hardly qualifies as a proper changelog ...
>
> Hm, that wasn't intended to be a changelog.
>
> This patch may not be current either, I haven't tested RT on a UV box
> in quite some time.

That last hunk looks like something that would be required even for !RT.
Would you mind to check that patch and write a changelog? If it doesn't
work for RT there is no need to carry this in -RT.

> -Mike

Sebastian

2018-05-07 08:00:33

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On Mon, 2018-05-07 at 09:39 +0200, Sebastian Andrzej Siewior wrote:
> On 2018-05-06 12:59:19 [+0200], Mike Galbraith wrote:
> > On Sun, 2018-05-06 at 12:26 +0200, Thomas Gleixner wrote:
> > > On Fri, 4 May 2018, Sebastian Andrzej Siewior wrote:
> > >
> > > > From: Mike Galbraith <[email protected]>
> > > >
> > > > Shrug. Lots of hobbyists have a beast in their basement, right?
> > >
> > > This hardly qualifies as a proper changelog ...
> >
> > Hm, that wasn't intended to be a changelog.
> >
> > This patch may not be current either, I haven't tested RT on a UV box
> > in quite some time.
>
> That last hunk looks like something that would be required even for !RT.
> Would you mind to check that patch and write a changelog? If it doesn't
> work for RT there is no need to carry this in -RT.

Yeah, I'll try to reserve a box.

-Mike

Subject: Re: [PATCH] x86: Convert mce timer to hrtimer

On 2018-05-04 13:14:58 [+0200], To [email protected] wrote:
> From: Thomas Gleixner <[email protected]>
>
> mce_timer is started in atomic contexts of cpu bringup. This results
> in might_sleep() warnings on RT. Convert mce_timer to a hrtimer to
> avoid this.

After an additional review I got to say that this patch can be dropped
as it is no longer required.
Sorry to bother you with this.

Sebastian

2018-05-19 14:11:58

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On Mon, 2018-05-07 at 09:39 +0200, Sebastian Andrzej Siewior wrote:
> On 2018-05-06 12:59:19 [+0200], Mike Galbraith wrote:
> > On Sun, 2018-05-06 at 12:26 +0200, Thomas Gleixner wrote:
> > > On Fri, 4 May 2018, Sebastian Andrzej Siewior wrote:
> > >
> > > > From: Mike Galbraith <[email protected]>
> > > >
> > > > Shrug. Lots of hobbyists have a beast in their basement, right?
> > >
> > > This hardly qualifies as a proper changelog ...
> >
> > Hm, that wasn't intended to be a changelog.
> >
> > This patch may not be current either, I haven't tested RT on a UV box
> > in quite some time.
>
> That last hunk looks like something that would be required even for !RT.
> Would you mind to check that patch and write a changelog? If it doesn't
> work for RT there is no need to carry this in -RT.

None of that patch is needed for a UV3000, but the below is. It's
likely still valid for now ancient UV boxen, but the UV100 the patch
was originally written for (2011/2.6.33-rt) has apparently wandered off
to become a beer keg or something meanwhile, so I can't test.

UV: Fix uv_bau_init() check_preemption_disabled() gripeage

[ 2.851947] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1
[ 2.851951] caller is uv_bau_init+0x28/0xb62
[ 2.851954] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.17.0-rc5-0.g3e3e37b-rt_debug
[ 2.851956] Hardware name: SGI UV3000/UV3000, BIOS SGI UV 3000 series BIOS 01/15/2015
[ 2.851957] Call Trace:
[ 2.851964] dump_stack+0x85/0xcb
[ 2.851969] check_preemption_disabled+0x10c/0x120
[ 2.851972] ? init_per_cpu+0x88c/0x88c
[ 2.851974] uv_bau_init+0x28/0xb62
[ 2.851979] ? lapic_cal_handler+0xbb/0xbb
[ 2.851982] ? rt_mutex_unlock+0x35/0x50
[ 2.851985] ? init_per_cpu+0x88c/0x88c
[ 2.851988] ? set_debug_rodata+0x11/0x11
[ 2.851991] do_one_initcall+0x46/0x249
[ 2.851995] kernel_init_freeable+0x207/0x29c
[ 2.851999] ? rest_init+0xd0/0xd0
[ 2.852000] kernel_init+0xa/0x110
[ 2.852000] ret_from_fork+0x3a/0x50

(gdb) list *uv_bau_init+0x28
0xffffffff824a4d96 is in uv_bau_init (./arch/x86/include/asm/uv/uv_hub.h:212).
207 return (struct uv_hub_info_s *)__uv_hub_info_list[node];
208 }
209
210 static inline struct uv_hub_info_s *_uv_hub_info(void)
211 {
212 return (struct uv_hub_info_s *)uv_cpu_info->p_uv_hub_info;
213 }
214 #define uv_hub_info _uv_hub_info()
215
216 static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
(gdb)

arch/x86/include/asm/uv/uv_hub.h:
197 #define uv_cpu_info this_cpu_ptr(&__uv_cpu_info)

This and other substitutions make uv_bau_init() annoying for a PREEMPT
kernel, but PREEMPT_RT can silence the lot with one migrate_disable().

Signed-off-by: Mike Galbraith <[email protected]>
---
arch/x86/platform/uv/tlb_uv.c | 5 +++++
1 file changed, 5 insertions(+)

--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2213,6 +2213,8 @@ static int __init uv_bau_init(void)
if (!is_uv_system())
return 0;

+ migrate_disable();
+
if (is_uv4_hub())
ops = uv4_bau_ops;
else if (is_uv3_hub())
@@ -2269,6 +2271,8 @@ static int __init uv_bau_init(void)
}
}

+ migrate_enable();
+
return 0;

err_bau_disable:
@@ -2276,6 +2280,7 @@ static int __init uv_bau_init(void)
for_each_possible_cpu(cur_cpu)
free_cpumask_var(per_cpu(uv_flush_tlb_mask, cur_cpu));

+ migrate_enable();
set_bau_off();
nobau_perm = 1;


Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On 2018-05-19 16:09:56 [+0200], Mike Galbraith wrote:
> None of that patch is needed for a UV3000, but the below is. It's
> likely still valid for now ancient UV boxen, but the UV100 the patch
> was originally written for (2011/2.6.33-rt) has apparently wandered off
> to become a beer keg or something meanwhile, so I can't test.

so the old patch can go. Noted.
Regarding the preempt_disable() in the original patch in uv_read_rtc():
This looks essential for PREEMPT configs. Is it possible to get this
tested by someone or else get rid of the UV code? It looks broken for
"uv_get_min_hub_revision_id() != 1".

> UV: Fix uv_bau_init() check_preemption_disabled() gripeage
>
> [ 2.851947] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1
> [ 2.851951] caller is uv_bau_init+0x28/0xb62
> [ 2.851954] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.17.0-rc5-0.g3e3e37b-rt_debug
> [ 2.851956] Hardware name: SGI UV3000/UV3000, BIOS SGI UV 3000 series BIOS 01/15/2015
> [ 2.851957] Call Trace:
> [ 2.851964] dump_stack+0x85/0xcb
> [ 2.851969] check_preemption_disabled+0x10c/0x120
> [ 2.851972] ? init_per_cpu+0x88c/0x88c
> [ 2.851974] uv_bau_init+0x28/0xb62
> [ 2.851979] ? lapic_cal_handler+0xbb/0xbb
> [ 2.851982] ? rt_mutex_unlock+0x35/0x50
> [ 2.851985] ? init_per_cpu+0x88c/0x88c
> [ 2.851988] ? set_debug_rodata+0x11/0x11
> [ 2.851991] do_one_initcall+0x46/0x249
> [ 2.851995] kernel_init_freeable+0x207/0x29c
> [ 2.851999] ? rest_init+0xd0/0xd0
> [ 2.852000] kernel_init+0xa/0x110
> [ 2.852000] ret_from_fork+0x3a/0x50
>
> (gdb) list *uv_bau_init+0x28
> 0xffffffff824a4d96 is in uv_bau_init (./arch/x86/include/asm/uv/uv_hub.h:212).
> 207 return (struct uv_hub_info_s *)__uv_hub_info_list[node];
> 208 }
> 209
> 210 static inline struct uv_hub_info_s *_uv_hub_info(void)
> 211 {
> 212 return (struct uv_hub_info_s *)uv_cpu_info->p_uv_hub_info;
> 213 }
> 214 #define uv_hub_info _uv_hub_info()
> 215
> 216 static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
> (gdb)
>
> arch/x86/include/asm/uv/uv_hub.h:
> 197 #define uv_cpu_info this_cpu_ptr(&__uv_cpu_info)
>
> This and other substitutions make uv_bau_init() annoying for a PREEMPT
> kernel, but PREEMPT_RT can silence the lot with one migrate_disable().

Why does PREEMPT_RT require migrate_disable() but PREEMPT only is fine
as-is? This does not look right.

> Signed-off-by: Mike Galbraith <[email protected]>
> ---
> arch/x86/platform/uv/tlb_uv.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> --- a/arch/x86/platform/uv/tlb_uv.c
> +++ b/arch/x86/platform/uv/tlb_uv.c
> @@ -2213,6 +2213,8 @@ static int __init uv_bau_init(void)
> if (!is_uv_system())
> return 0;
>
> + migrate_disable();
> +
> if (is_uv4_hub())
> ops = uv4_bau_ops;
> else if (is_uv3_hub())
> @@ -2269,6 +2271,8 @@ static int __init uv_bau_init(void)
> }
> }
>
> + migrate_enable();
> +
> return 0;
>
> err_bau_disable:
> @@ -2276,6 +2280,7 @@ static int __init uv_bau_init(void)
> for_each_possible_cpu(cur_cpu)
> free_cpumask_var(per_cpu(uv_flush_tlb_mask, cur_cpu));
>
> + migrate_enable();
> set_bau_off();
> nobau_perm = 1;
>

Sebastian

2018-05-22 08:25:10

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On Tue, 2018-05-22 at 08:50 +0200, Sebastian Andrzej Siewior wrote:
>
> Regarding the preempt_disable() in the original patch in uv_read_rtc():
> This looks essential for PREEMPT configs. Is it possible to get this
> tested by someone or else get rid of the UV code? It looks broken for
> "uv_get_min_hub_revision_id() != 1".

I suspect SGI cares not one whit about PREEMPT.

> Why does PREEMPT_RT require migrate_disable() but PREEMPT only is fine
> as-is? This does not look right.

UV is not ok with a PREEMPT config, it's just that for RT it's dirt
simple to shut it up, whereas for PREEMPT, preempt_disable() across
uv_bau_init() doesn't cut it due to allocations, and whatever else I
would have met before ending the whack-a-mole game.

If I were in your shoes, I think I'd just stop caring about UV until a
real user appears. AFAIK, I'm the only guy who ever ran RT on UV, and
I only did so because SUSE asked me to look into it.. years ago now.

-Mike

Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On 2018-05-22 10:24:22 [+0200], Mike Galbraith wrote:
> On Tue, 2018-05-22 at 08:50 +0200, Sebastian Andrzej Siewior wrote:
> >
> > Regarding the preempt_disable() in the original patch in uv_read_rtc():
> > This looks essential for PREEMPT configs. Is it possible to get this
> > tested by someone or else get rid of the UV code? It looks broken for
> > "uv_get_min_hub_revision_id() != 1".
>
> I suspect SGI cares not one whit about PREEMPT.

so it is broken then. I leave it to the x86 maintainers but on the very
least it should depend on !PREEMPT (if not server).

> > Why does PREEMPT_RT require migrate_disable() but PREEMPT only is fine
> > as-is? This does not look right.
>
> UV is not ok with a PREEMPT config, it's just that for RT it's dirt
> simple to shut it up, whereas for PREEMPT, preempt_disable() across
> uv_bau_init() doesn't cut it due to allocations, and whatever else I
> would have met before ending the whack-a-mole game.
>
> If I were in your shoes, I think I'd just stop caring about UV until a
> real user appears. AFAIK, I'm the only guy who ever ran RT on UV, and
> I only did so because SUSE asked me to look into it.. years ago now.

Okay. The problem I have with this patch is that it remains RT only
while the problem it addresses is not RT-only and PREEMPT kernels are
very much affected.
The thing is that *you* are my only UV user :) If you suggest that I
should stop caring about UV than I do so. Please post a patch that adds
a dependency to UV on PREEMPT so that part of the architecture is
documented.

> -Mike

Sebastian

2018-05-22 09:48:03

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On Tue, 2018-05-22 at 11:14 +0200, Sebastian Andrzej Siewior wrote:
> On 2018-05-22 10:24:22 [+0200], Mike Galbraith wrote:
>
> > If I were in your shoes, I think I'd just stop caring about UV until a
> > real user appears. AFAIK, I'm the only guy who ever ran RT on UV, and
> > I only did so because SUSE asked me to look into it.. years ago now.
>
> Okay. The problem I have with this patch is that it remains RT only
> while the problem it addresses is not RT-only and PREEMPT kernels are
> very much affected.

Ah, but when RT gets merged (someday... maybe), that patch will apply,
and instantly make all.. zero.. UV-RT users happy campers :)

> The thing is that *you* are my only UV user :)

Crash-test-dummies don't really qualify as users :)

> If you suggest that I
> should stop caring about UV than I do so. Please post a patch that adds
> a dependency to UV on PREEMPT so that part of the architecture is
> documented.

Will do.

-Mike

2018-05-22 12:49:39

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH] x86: UV: raw_spinlock conversion

On Tue, 2018-05-22 at 11:46 +0200, Mike Galbraith wrote:
> On Tue, 2018-05-22 at 11:14 +0200, Sebastian Andrzej Siewior wrote:
>
> > If you suggest that I
> > should stop caring about UV than I do so. Please post a patch that adds
> > a dependency to UV on PREEMPT so that part of the architecture is
> > documented.
>
> Will do.

On second thought, no I won't. It's either already known, or it should
be, making any such submission smell funny.

-Mike