LinuxLists.cc - [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

2007-10-29 18:59:56

Subject: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

Hello!

A few random patches that permit POWER to pass kernbench on -rt.
Many of these have more focus on expediency than care for correctness,
so might best be thought of as workarounds than as complete solutions.
There are still issues not addressed by this patch, including:

o kmem_cache_alloc() from non-preemptible context during
bootup (xics_startup() building the irq_radix_revmap()).

o unmap_vmas() freeing pages with preemption disabled.
Might be able to address this by linking the pages together,
then freeing them en masse after preemption has been re-enabled,
but there is likely a better approach.

Thoughts?

Signed-off-by: Paul E. McKenney <[email protected]>
---

arch/powerpc/kernel/prom.c | 2 +-
arch/powerpc/mm/fault.c | 3 +++
arch/powerpc/mm/tlb_64.c | 8 ++++++--
arch/powerpc/platforms/pseries/eeh.c | 2 +-
drivers/of/base.c | 2 +-
include/asm-powerpc/tlb.h | 5 ++++-
include/asm-powerpc/tlbflush.h | 5 ++++-
mm/memory.c | 2 ++
8 files changed, 22 insertions(+), 7 deletions(-)

diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/kernel/prom.c linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/prom.c
--- linux-2.6.23.1-rt4/arch/powerpc/kernel/prom.c 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/prom.c 2007-10-28 13:37:23.000000000 -0700
@@ -80,7 +80,7 @@ struct boot_param_header *initial_boot_p

extern struct device_node *allnodes; /* temporary while merging */

-extern rwlock_t devtree_lock; /* temporary while merging */
+extern raw_rwlock_t devtree_lock; /* temporary while merging */

/* export that to outside world */
struct device_node *of_chosen;
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/mm/fault.c linux-2.6.23.1-rt4-fix/arch/powerpc/mm/fault.c
--- linux-2.6.23.1-rt4/arch/powerpc/mm/fault.c 2007-10-27 22:20:57.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/arch/powerpc/mm/fault.c 2007-10-28 13:49:07.000000000 -0700
@@ -301,6 +301,7 @@ good_area:
if (get_pteptr(mm, address, &ptep, &pmdp)) {
spinlock_t *ptl = pte_lockptr(mm, pmdp);
spin_lock(ptl);
+ preempt_disable();
if (pte_present(*ptep)) {
struct page *page = pte_page(*ptep);

@@ -310,10 +311,12 @@ good_area:
}
pte_update(ptep, 0, _PAGE_HWEXEC);
_tlbie(address);
+ preempt_enable();
pte_unmap_unlock(ptep, ptl);
up_read(&mm->mmap_sem);
return 0;
}
+ preempt_enable();
pte_unmap_unlock(ptep, ptl);
}
#endif
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/mm/tlb_64.c linux-2.6.23.1-rt4-fix/arch/powerpc/mm/tlb_64.c
--- linux-2.6.23.1-rt4/arch/powerpc/mm/tlb_64.c 2007-10-27 22:20:57.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/arch/powerpc/mm/tlb_64.c 2007-10-28 13:50:38.000000000 -0700
@@ -194,7 +194,9 @@ void hpte_need_flush(struct mm_struct *m
* batch
*/
if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
+ preempt_disable();
__flush_tlb_pending(batch);
+ preempt_enable();
i = 0;
}
if (i == 0) {
@@ -211,7 +213,9 @@ void hpte_need_flush(struct mm_struct *m
* always flush it on RT to reduce scheduling latency.
*/
if (machine_is(celleb)) {
+ preempt_disable();
__flush_tlb_pending(batch);
+ preempt_enable();
return;
}
#endif /* CONFIG_PREEMPT_RT */
@@ -292,7 +296,7 @@ void __flush_hash_table_range(struct mm_
* to being hashed). This is not the most performance oriented
* way to do things but is fine for our needs here.
*/
- local_irq_save(flags);
+ raw_local_irq_save(flags);
arch_enter_lazy_mmu_mode();
for (; start < end; start += PAGE_SIZE) {
pte_t *ptep = find_linux_pte(mm->pgd, start);
@@ -306,7 +310,7 @@ void __flush_hash_table_range(struct mm_
hpte_need_flush(mm, start, ptep, pte, 0);
}
arch_leave_lazy_mmu_mode();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}

#endif /* CONFIG_HOTPLUG */
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/platforms/pseries/eeh.c linux-2.6.23.1-rt4-fix/arch/powerpc/platforms/pseries/eeh.c
--- linux-2.6.23.1-rt4/arch/powerpc/platforms/pseries/eeh.c 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/arch/powerpc/platforms/pseries/eeh.c 2007-10-28 15:43:54.000000000 -0700
@@ -97,7 +97,7 @@ int eeh_subsystem_enabled;
EXPORT_SYMBOL(eeh_subsystem_enabled);

/* Lock to avoid races due to multiple reports of an error */
-static DEFINE_SPINLOCK(confirm_error_lock);
+static DEFINE_RAW_SPINLOCK(confirm_error_lock);

/* Buffer for reporting slot-error-detail rtas calls. Its here
* in BSS, and not dynamically alloced, so that it ends up in
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/drivers/of/base.c linux-2.6.23.1-rt4-fix/drivers/of/base.c
--- linux-2.6.23.1-rt4/drivers/of/base.c 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/drivers/of/base.c 2007-10-28 13:38:36.000000000 -0700
@@ -25,7 +25,7 @@ struct device_node *allnodes;
/* use when traversing tree through the allnext, child, sibling,
* or parent members of struct device_node.
*/
-DEFINE_RWLOCK(devtree_lock);
+DEFINE_RAW_RWLOCK(devtree_lock);

int of_n_addr_cells(struct device_node *np)
{
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/include/asm-powerpc/tlbflush.h linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlbflush.h
--- linux-2.6.23.1-rt4/include/asm-powerpc/tlbflush.h 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlbflush.h 2007-10-28 11:36:47.000000000 -0700
@@ -118,8 +118,11 @@ static inline void arch_leave_lazy_mmu_m
{
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);

- if (batch->index)
+ if (batch->index) {
+ preempt_disable();
__flush_tlb_pending(batch);
+ preempt_enable();
+ }
batch->active = 0;
}

diff -urpNa -X dontdiff linux-2.6.23.1-rt4/include/asm-powerpc/tlb.h linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlb.h
--- linux-2.6.23.1-rt4/include/asm-powerpc/tlb.h 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlb.h 2007-10-28 11:36:05.000000000 -0700
@@ -44,8 +44,11 @@ static inline void tlb_flush(struct mmu_
* pages are going to be freed and we really don't want to have a CPU
* access a freed page because it has a stale TLB
*/
- if (tlbbatch->index)
+ if (tlbbatch->index) {
+ preempt_disable();
__flush_tlb_pending(tlbbatch);
+ preempt_enable();
+ }

pte_free_finish();
}
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/mm/memory.c linux-2.6.23.1-rt4-fix/mm/memory.c
--- linux-2.6.23.1-rt4/mm/memory.c 2007-10-27 22:20:57.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/mm/memory.c 2007-10-28 15:40:36.000000000 -0700
@@ -664,6 +664,7 @@ static unsigned long zap_pte_range(struc
int anon_rss = 0;

pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ preempt_disable();
arch_enter_lazy_mmu_mode();
do {
pte_t ptent = *pte;
@@ -732,6 +733,7 @@ static unsigned long zap_pte_range(struc

add_mm_rss(mm, file_rss, anon_rss);
arch_leave_lazy_mmu_mode();
+ preempt_enable();
pte_unmap_unlock(pte - 1, ptl);

return addr;

2007-10-29 20:08:49

by Benjamin Herrenschmidt

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Mon, 2007-10-29 at 11:50 -0700, Paul E. McKenney wrote:
> Hello!
>
> A few random patches that permit POWER to pass kernbench on -rt.
> Many of these have more focus on expediency than care for correctness,
> so might best be thought of as workarounds than as complete solutions.
> There are still issues not addressed by this patch, including:
>
> o kmem_cache_alloc() from non-preemptible context during
> bootup (xics_startup() building the irq_radix_revmap()).
>
> o unmap_vmas() freeing pages with preemption disabled.
> Might be able to address this by linking the pages together,
> then freeing them en masse after preemption has been re-enabled,
> but there is likely a better approach.
>
> Thoughts?

I see a lot of case where you add preempt_disable/enable around areas
that have the PTE lock held...

So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...
there are some strong requirements that anything within that lock is not
preempted, so zap_pte_ranges() is the obvious ones but all of them would
need to be addressed.

Ben.

2007-10-29 20:26:37

by Paul E. McKenney

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Tue, Oct 30, 2007 at 07:07:48AM +1100, Benjamin Herrenschmidt wrote:
>
> On Mon, 2007-10-29 at 11:50 -0700, Paul E. McKenney wrote:
> > Hello!
> >
> > A few random patches that permit POWER to pass kernbench on -rt.
> > Many of these have more focus on expediency than care for correctness,
> > so might best be thought of as workarounds than as complete solutions.
> > There are still issues not addressed by this patch, including:
> >
> > o kmem_cache_alloc() from non-preemptible context during
> > bootup (xics_startup() building the irq_radix_revmap()).
> >
> > o unmap_vmas() freeing pages with preemption disabled.
> > Might be able to address this by linking the pages together,
> > then freeing them en masse after preemption has been re-enabled,
> > but there is likely a better approach.
> >
> > Thoughts?
>
> I see a lot of case where you add preempt_disable/enable around areas
> that have the PTE lock held...
>
> So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...
> there are some strong requirements that anything within that lock is not
> preempted, so zap_pte_ranges() is the obvious ones but all of them would
> need to be addressed.

Right in one! One of the big changes in -rt is that spinlock critical
sections (and RCU read-side critical sections, for that matter) are
preemptible under CONFIG_PREEMPT_RT.

And I agree that this patchset will have missed quite a few places where
additional changes are required. Hence the word "including" above, rather
than something like "specifically". ;-)

Thanx, Paul

2007-10-29 20:40:42

by Benjamin Herrenschmidt

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Mon, 2007-10-29 at 13:26 -0700, Paul E. McKenney wrote:
>
> > I see a lot of case where you add preempt_disable/enable around
> areas
> > that have the PTE lock held...
> >
> > So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...
> > there are some strong requirements that anything within that lock is
> not
> > preempted, so zap_pte_ranges() is the obvious ones but all of them
> would
> > need to be addressed.
>
> Right in one! One of the big changes in -rt is that spinlock critical
> sections (and RCU read-side critical sections, for that matter) are
> preemptible under CONFIG_PREEMPT_RT.
>
> And I agree that this patchset will have missed quite a few places
> where
> additional changes are required. Hence the word "including" above,
> rather
> than something like "specifically". ;-)

Ok, well, I'm pretty familiar with that MM code since I wrote a good
deal of the current version so I'll try to spend some time with your
patch have a look. It may have to wait for next week though, but feel
free to ping me if you don't hear back, in case it falls through the
hole in my brain :-)

Ben.

2007-10-29 21:16:48

by Paul E. McKenney

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Tue, Oct 30, 2007 at 07:37:50AM +1100, Benjamin Herrenschmidt wrote:
>
> On Mon, 2007-10-29 at 13:26 -0700, Paul E. McKenney wrote:
> >
> > > I see a lot of case where you add preempt_disable/enable around
> > areas
> > > that have the PTE lock held...
> > >
> > > So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...
> > > there are some strong requirements that anything within that lock is
> > not
> > > preempted, so zap_pte_ranges() is the obvious ones but all of them
> > would
> > > need to be addressed.
> >
> > Right in one! One of the big changes in -rt is that spinlock critical
> > sections (and RCU read-side critical sections, for that matter) are
> > preemptible under CONFIG_PREEMPT_RT.
> >
> > And I agree that this patchset will have missed quite a few places
> > where
> > additional changes are required. Hence the word "including" above,
> > rather
> > than something like "specifically". ;-)
>
> Ok, well, I'm pretty familiar with that MM code since I wrote a good
> deal of the current version so I'll try to spend some time with your
> patch have a look. It may have to wait for next week though, but feel
> free to ping me if you don't hear back, in case it falls through the
> hole in my brain :-)

Works for me!!!

Thanx, Paul

2007-10-31 20:55:33

by Darren Hart

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Tue, 2007-10-30 at 07:07 +1100, Benjamin Herrenschmidt wrote:
> On Mon, 2007-10-29 at 11:50 -0700, Paul E. McKenney wrote:
> > Hello!
> >
> > A few random patches that permit POWER to pass kernbench on -rt.
> > Many of these have more focus on expediency than care for correctness,
> > so might best be thought of as workarounds than as complete solutions.
> > There are still issues not addressed by this patch, including:
> >
> > o kmem_cache_alloc() from non-preemptible context during
> > bootup (xics_startup() building the irq_radix_revmap()).
> >
> > o unmap_vmas() freeing pages with preemption disabled.
> > Might be able to address this by linking the pages together,
> > then freeing them en masse after preemption has been re-enabled,
> > but there is likely a better approach.
> >
> > Thoughts?
>
> I see a lot of case where you add preempt_disable/enable around areas
> that have the PTE lock held...
>
> So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...

So as Paul mentioned, spin_lock is now a mutex. There is a new
raw_spinlock however (simply change the way it is declared, calling
conventions are the same) which is used in a very few areas where a
traditional spin_lock is truly necessary. This may or may not be one of
those times, but I wanted to point it out.

--Darren

> there are some strong requirements that anything within that lock is not
> preempted, so zap_pte_ranges() is the obvious ones but all of them would
> need to be addressed.
>
> Ben.
>
>

2007-10-31 21:17:15

by Benjamin Herrenschmidt

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

> So as Paul mentioned, spin_lock is now a mutex. There is a new
> raw_spinlock however (simply change the way it is declared, calling
> conventions are the same) which is used in a very few areas where a
> traditional spin_lock is truly necessary. This may or may not be one of
> those times, but I wanted to point it out.

Yeah, I figured that. My main worry has more to do with some fishy
assumptions the powerpc VM code does regarding what can and cannot
happen in those locked sections, among other things. I'll have to sit
and think about it for a little while to convince myself we are ok ...
or not. Plus we do keep track of various MM related things in per-CPU
data structures but it looks like Paul already spotted that.

Cheers,
Ben.

2007-11-01 15:50:51

by Paul E. McKenney

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Thu, Nov 01, 2007 at 08:15:28AM +1100, Benjamin Herrenschmidt wrote:
>
> > So as Paul mentioned, spin_lock is now a mutex. There is a new
> > raw_spinlock however (simply change the way it is declared, calling
> > conventions are the same) which is used in a very few areas where a
> > traditional spin_lock is truly necessary. This may or may not be one of
> > those times, but I wanted to point it out.
>
> Yeah, I figured that. My main worry has more to do with some fishy
> assumptions the powerpc VM code does regarding what can and cannot
> happen in those locked sections, among other things. I'll have to sit
> and think about it for a little while to convince myself we are ok ...
> or not. Plus we do keep track of various MM related things in per-CPU
> data structures but it looks like Paul already spotted that.

My concern would be that I failed to spot all of them. ;-)

Thanx, Paul

2007-12-13 03:57:15

by Steven Rostedt

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Mon, 29 Oct 2007, Paul E. McKenney wrote:
> diff -urpNa -X dontdiff linux-2.6.23.1-rt4/mm/memory.c linux-2.6.23.1-rt4-fix/mm/memory.c
> --- linux-2.6.23.1-rt4/mm/memory.c 2007-10-27 22:20:57.000000000 -0700
> +++ linux-2.6.23.1-rt4-fix/mm/memory.c 2007-10-28 15:40:36.000000000 -0700
> @@ -664,6 +664,7 @@ static unsigned long zap_pte_range(struc
> int anon_rss = 0;
>
> pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
> + preempt_disable();
> arch_enter_lazy_mmu_mode();
> do {
> pte_t ptent = *pte;
> @@ -732,6 +733,7 @@ static unsigned long zap_pte_range(struc
>
> add_mm_rss(mm, file_rss, anon_rss);
> arch_leave_lazy_mmu_mode();
> + preempt_enable();
> pte_unmap_unlock(pte - 1, ptl);
>
> return addr;

I'm pulling your patch for the above added code. Took me a few hours to
find the culprit, but I was getting scheduling in atomic bugs. Turns out
that this code you put "preempt_disable" in calls sleeping spinlocks.

Might want to run with DEBUG_PREEMPT.

Thanks,

-- Steve

2007-12-13 06:10:49

by Paul E. McKenney

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Wed, Dec 12, 2007 at 10:56:12PM -0500, Steven Rostedt wrote:
>
> On Mon, 29 Oct 2007, Paul E. McKenney wrote:
> > diff -urpNa -X dontdiff linux-2.6.23.1-rt4/mm/memory.c linux-2.6.23.1-rt4-fix/mm/memory.c
> > --- linux-2.6.23.1-rt4/mm/memory.c 2007-10-27 22:20:57.000000000 -0700
> > +++ linux-2.6.23.1-rt4-fix/mm/memory.c 2007-10-28 15:40:36.000000000 -0700
> > @@ -664,6 +664,7 @@ static unsigned long zap_pte_range(struc
> > int anon_rss = 0;
> >
> > pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
> > + preempt_disable();
> > arch_enter_lazy_mmu_mode();
> > do {
> > pte_t ptent = *pte;
> > @@ -732,6 +733,7 @@ static unsigned long zap_pte_range(struc
> >
> > add_mm_rss(mm, file_rss, anon_rss);
> > arch_leave_lazy_mmu_mode();
> > + preempt_enable();
> > pte_unmap_unlock(pte - 1, ptl);
> >
> > return addr;
>
> I'm pulling your patch for the above added code. Took me a few hours to
> find the culprit, but I was getting scheduling in atomic bugs. Turns out
> that this code you put "preempt_disable" in calls sleeping spinlocks.
>
> Might want to run with DEBUG_PREEMPT.

I thought that you had already pulled the above version...

Here is the replacement that I posted on November 9th (with much help
from Ben H):

http://lkml.org/lkml/2007/11/9/114

Thanx, Paul

Signed-off-by: Paul E. McKenney <[email protected]>
---

diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/kernel/process.c linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/process.c
--- linux-2.6.23.1-rt4/arch/powerpc/kernel/process.c 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/process.c 2007-11-12 09:18:55.000000000 -0800
@@ -245,6 +245,10 @@ struct task_struct *__switch_to(struct t
struct thread_struct *new_thread, *old_thread;
unsigned long flags;
struct task_struct *last;
+#ifdef CONFIG_PREEMPT_RT
+ struct ppc64_tlb_batch *batch;
+ int hadbatch;
+#endif /* #ifdef CONFIG_PREEMPT_RT */

#ifdef CONFIG_SMP
/* avoid complexity of lazy save/restore of fpu
@@ -325,6 +329,17 @@ struct task_struct *__switch_to(struct t
}
#endif

+#ifdef CONFIG_PREEMPT_RT
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ if (batch->active) {
+ hadbatch = 1;
+ if (batch->index) {
+ __flush_tlb_pending(batch);
+ }
+ batch->active = 0;
+ }
+#endif /* #ifdef CONFIG_PREEMPT_RT */
+
local_irq_save(flags);

account_system_vtime(current);
@@ -335,6 +350,13 @@ struct task_struct *__switch_to(struct t

local_irq_restore(flags);

+#ifdef CONFIG_PREEMPT_RT
+ if (hadbatch) {
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ batch->active = 1;
+ }
+#endif /* #ifdef CONFIG_PREEMPT_RT */
+
return last;
}

diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/kernel/prom.c linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/prom.c
--- linux-2.6.23.1-rt4/arch/powerpc/kernel/prom.c 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/prom.c 2007-10-28 13:37:23.000000000 -0700
@@ -80,7 +80,7 @@ struct boot_param_header *initial_boot_p

extern struct device_node *allnodes; /* temporary while merging */

-extern rwlock_t devtree_lock; /* temporary while merging */
+extern raw_rwlock_t devtree_lock; /* temporary while merging */

/* export that to outside world */
struct device_node *of_chosen;
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/mm/tlb_64.c linux-2.6.23.1-rt4-fix/arch/powerpc/mm/tlb_64.c
--- linux-2.6.23.1-rt4/arch/powerpc/mm/tlb_64.c 2007-10-27 22:20:57.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/arch/powerpc/mm/tlb_64.c 2007-11-08 16:49:04.000000000 -0800
@@ -133,7 +133,7 @@ void pgtable_free_tlb(struct mmu_gather
void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long pte, int huge)
{
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
unsigned long vsid, vaddr;
unsigned int psize;
real_pte_t rpte;
@@ -180,6 +180,7 @@ void hpte_need_flush(struct mm_struct *m
*/
if (!batch->active) {
flush_hash_page(vaddr, rpte, psize, 0);
+ put_cpu_var(ppc64_tlb_batch);
return;
}

@@ -212,12 +213,14 @@ void hpte_need_flush(struct mm_struct *m
*/
if (machine_is(celleb)) {
__flush_tlb_pending(batch);
+ put_cpu_var(ppc64_tlb_batch);
return;
}
#endif /* CONFIG_PREEMPT_RT */

if (i >= PPC64_TLB_BATCH_NR)
__flush_tlb_pending(batch);
+ put_cpu_var(ppc64_tlb_batch);
}

/*
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/platforms/pseries/eeh.c linux-2.6.23.1-rt4-fix/arch/powerpc/platforms/pseries/eeh.c
--- linux-2.6.23.1-rt4/arch/powerpc/platforms/pseries/eeh.c 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/arch/powerpc/platforms/pseries/eeh.c 2007-10-28 15:43:54.000000000 -0700
@@ -97,7 +97,7 @@ int eeh_subsystem_enabled;
EXPORT_SYMBOL(eeh_subsystem_enabled);

/* Lock to avoid races due to multiple reports of an error */
-static DEFINE_SPINLOCK(confirm_error_lock);
+static DEFINE_RAW_SPINLOCK(confirm_error_lock);

/* Buffer for reporting slot-error-detail rtas calls. Its here
* in BSS, and not dynamically alloced, so that it ends up in
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/drivers/of/base.c linux-2.6.23.1-rt4-fix/drivers/of/base.c
--- linux-2.6.23.1-rt4/drivers/of/base.c 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/drivers/of/base.c 2007-10-28 13:38:36.000000000 -0700
@@ -25,7 +25,7 @@ struct device_node *allnodes;
/* use when traversing tree through the allnext, child, sibling,
* or parent members of struct device_node.
*/
-DEFINE_RWLOCK(devtree_lock);
+DEFINE_RAW_RWLOCK(devtree_lock);

int of_n_addr_cells(struct device_node *np)
{
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/include/asm-powerpc/tlbflush.h linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlbflush.h
--- linux-2.6.23.1-rt4/include/asm-powerpc/tlbflush.h 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlbflush.h 2007-11-08 17:11:18.000000000 -0800
@@ -109,18 +109,23 @@ extern void hpte_need_flush(struct mm_st

static inline void arch_enter_lazy_mmu_mode(void)
{
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);

batch->active = 1;
+ put_cpu_var(ppc64_tlb_batch);
}

static inline void arch_leave_lazy_mmu_mode(void)
{
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);

- if (batch->index)
- __flush_tlb_pending(batch);
- batch->active = 0;
+ if (batch->active) {
+ if (batch->index) {
+ __flush_tlb_pending(batch);
+ }
+ batch->active = 0;
+ }
+ put_cpu_var(ppc64_tlb_batch);
}

#define arch_flush_lazy_mmu_mode() do {} while (0)
diff -urpNa -X dontdiff linux-2.6.23.1-rt4/include/asm-powerpc/tlb.h linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlb.h
--- linux-2.6.23.1-rt4/include/asm-powerpc/tlb.h 2007-10-12 09:43:44.000000000 -0700
+++ linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlb.h 2007-10-28 11:36:05.000000000 -0700
@@ -44,8 +44,11 @@ static inline void tlb_flush(struct mmu_
* pages are going to be freed and we really don't want to have a CPU
* access a freed page because it has a stale TLB
*/
- if (tlbbatch->index)
+ if (tlbbatch->index) {
+ preempt_disable();
__flush_tlb_pending(tlbbatch);
+ preempt_enable();
+ }

pte_free_finish();
}

2007-12-13 12:53:48

by Steven Rostedt

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Wed, 12 Dec 2007, Paul E. McKenney wrote:

> >
> > I'm pulling your patch for the above added code. Took me a few hours to
> > find the culprit, but I was getting scheduling in atomic bugs. Turns out
> > that this code you put "preempt_disable" in calls sleeping spinlocks.
> >
> > Might want to run with DEBUG_PREEMPT.
>
> I thought that you had already pulled the above version...
>
> Here is the replacement that I posted on November 9th (with much help
> from Ben H):
>
> http://lkml.org/lkml/2007/11/9/114

OK, sorry, I somehow got the two reversed, and I think I replaced the new
one with the old one :-(

I blame the expresso!

>
> Signed-off-by: Paul E. McKenney <[email protected]>

OK, will apply to -rt14

Thanks,

-- Steve

2007-12-13 18:26:17

by Paul E. McKenney

[permalink] [raw]

Subject: Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

On Thu, Dec 13, 2007 at 07:52:41AM -0500, Steven Rostedt wrote:
>
>
> On Wed, 12 Dec 2007, Paul E. McKenney wrote:
>
> > >
> > > I'm pulling your patch for the above added code. Took me a few hours to
> > > find the culprit, but I was getting scheduling in atomic bugs. Turns out
> > > that this code you put "preempt_disable" in calls sleeping spinlocks.
> > >
> > > Might want to run with DEBUG_PREEMPT.
> >
> > I thought that you had already pulled the above version...
> >
> > Here is the replacement that I posted on November 9th (with much help
> > from Ben H):
> >
> > http://lkml.org/lkml/2007/11/9/114
>
> OK, sorry, I somehow got the two reversed, and I think I replaced the new
> one with the old one :-(

That sounds like something -I- would do!!! ;-)

> I blame the expresso!

If you give -me- espresso, you also have to give me a putty knife so that
I can scrape myself off of the ceiling!

> > Signed-off-by: Paul E. McKenney <[email protected]>
>
> OK, will apply to -rt14

Thank you!

Thanx, Paul