On an SMP MIPS32 2.6.30 system with cache aliases, I am seeing the
following sequence of events:
1) copy_user_highpage() runs on CPU0, invoking kmap_coherent() to create
a temporary mapping in the fixmap region
2) copy_page() starts on CPU0
3) CPU1 sends CPU0 an IPI asking CPU0 to run
local_r4k_flush_cache_page()
4) CPU0 takes the interrupt, interrupting copy_page()
5) local_r4k_flush_cache_page() on CPU0 calls kmap_coherent() again
6) The second invocation of kmap_coherent() on CPU0 tries to use the
same fixmap virtual address that was being used by copy_user_highpage()
7) CPU0 throws a machine check exception for the TLB address conflict
Here is my proposed fix:
a) kmap_coherent() will maintain a flag for each CPU indicating whether
there is an active mapping (kmap_coherent_inuse)
b) kmap_coherent() will return a NULL address in the unlikely case that
it was called while another mapping was already outstanding
c) local_r4k_flush_cache_page() will check for a NULL return value from
kmap_coherent(), and compensate by using indexed cacheops instead of hit
cacheops
Signed-off-by: Kevin Cernekee <[email protected]>
---
arch/mips/mm/c-r4k.c | 25 +++++++++++++++++++------
arch/mips/mm/init.c | 17 +++++++++++++++--
2 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 6721ee2..572fe7e 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -459,7 +459,7 @@ static inline void local_r4k_flush_cache_page(void *args)
struct page *page = pfn_to_page(fcp_args->pfn);
int exec = vma->vm_flags & VM_EXEC;
struct mm_struct *mm = vma->vm_mm;
- int map_coherent = 0;
+ int map_coherent = 0, use_indexed = 0;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
@@ -499,13 +499,23 @@ static inline void local_r4k_flush_cache_page(void *args)
vaddr = kmap_coherent(page, addr);
else
vaddr = kmap_atomic(page, KM_USER0);
- addr = (unsigned long)vaddr;
+
+ if (unlikely(vaddr == NULL))
+ use_indexed = 1;
+ else
+ addr = (unsigned long)vaddr;
}
if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) {
- r4k_blast_dcache_page(addr);
- if (exec && !cpu_icache_snoops_remote_store)
- r4k_blast_scache_page(addr);
+ if (use_indexed) {
+ r4k_blast_dcache_page_indexed(addr);
+ if (exec && !cpu_icache_snoops_remote_store)
+ r4k_blast_scache_page_indexed(addr);
+ } else {
+ r4k_blast_dcache_page(addr);
+ if (exec && !cpu_icache_snoops_remote_store)
+ r4k_blast_scache_page(addr);
+ }
}
if (exec) {
if (vaddr && cpu_has_vtag_icache && mm == current->active_mm) {
@@ -514,7 +524,10 @@ static inline void local_r4k_flush_cache_page(void *args)
if (cpu_context(cpu, mm) != 0)
drop_mmu_context(mm, cpu);
} else
- r4k_blast_icache_page(addr);
+ if (use_indexed)
+ r4k_blast_icache_page_indexed(addr);
+ else
+ r4k_blast_icache_page(addr);
}
if (vaddr) {
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 0e82050..87967cd 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -31,6 +31,7 @@
#include <asm/asm-offsets.h>
#include <asm/bootinfo.h>
#include <asm/cachectl.h>
+#include <asm/cache.h>
#include <asm/cpu.h>
#include <asm/dma.h>
#include <asm/kmap_types.h>
@@ -105,6 +106,8 @@ unsigned long setup_zero_pages(void)
return 1UL << order;
}
+static int kmap_coherent_inuse[NR_CPUS] ____cacheline_aligned_in_smp;
+
#ifdef CONFIG_MIPS_MT_SMTC
static pte_t *kmap_coherent_pte;
static void __init kmap_coherent_init(void)
@@ -125,14 +128,15 @@ void *kmap_coherent(struct page *page, unsigned long addr)
unsigned long vaddr, flags, entrylo;
unsigned long old_ctx;
pte_t pte;
- int tlbidx;
+ int tlbidx, cpu;
BUG_ON(Page_dcache_dirty(page));
inc_preempt_count();
+ cpu = smp_processor_id();
idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
#ifdef CONFIG_MIPS_MT_SMTC
- idx += FIX_N_COLOURS * smp_processor_id();
+ idx += FIX_N_COLOURS * cpu;
#endif
vaddr = __fix_to_virt(FIX_CMAP_END - idx);
pte = mk_pte(page, PAGE_KERNEL);
@@ -143,6 +147,13 @@ void *kmap_coherent(struct page *page, unsigned long addr)
#endif
ENTER_CRITICAL(flags);
+ if (unlikely(kmap_coherent_inuse[cpu] != 0)) {
+ vaddr = 0;
+ dec_preempt_count();
+ goto out;
+ }
+ kmap_coherent_inuse[cpu] = 1;
+
old_ctx = read_c0_entryhi();
write_c0_entryhi(vaddr & (PAGE_MASK << 1));
write_c0_entrylo0(entrylo);
@@ -168,6 +179,7 @@ void *kmap_coherent(struct page *page, unsigned long addr)
#endif
tlbw_use_hazard();
write_c0_entryhi(old_ctx);
+out:
EXIT_CRITICAL(flags);
return (void*) vaddr;
@@ -195,6 +207,7 @@ void kunmap_coherent(void)
write_c0_entryhi(old_ctx);
EXIT_CRITICAL(flags);
#endif
+ kmap_coherent_inuse[smp_processor_id()] = 0;
dec_preempt_count();
preempt_check_resched();
}
--
1.6.3.1
On Sat, Sep 05, 2009 at 02:38:41PM -0700, Kevin Cernekee wrote:
> On an SMP MIPS32 2.6.30 system with cache aliases, I am seeing the
> following sequence of events:
>
> 1) copy_user_highpage() runs on CPU0, invoking kmap_coherent() to create
> a temporary mapping in the fixmap region
>
> 2) copy_page() starts on CPU0
>
> 3) CPU1 sends CPU0 an IPI asking CPU0 to run
> local_r4k_flush_cache_page()
>
> 4) CPU0 takes the interrupt, interrupting copy_page()
>
> 5) local_r4k_flush_cache_page() on CPU0 calls kmap_coherent() again
>
> 6) The second invocation of kmap_coherent() on CPU0 tries to use the
> same fixmap virtual address that was being used by copy_user_highpage()
>
> 7) CPU0 throws a machine check exception for the TLB address conflict
Nice analysis!
> Here is my proposed fix:
>
> a) kmap_coherent() will maintain a flag for each CPU indicating whether
> there is an active mapping (kmap_coherent_inuse)
>
> b) kmap_coherent() will return a NULL address in the unlikely case that
> it was called while another mapping was already outstanding
>
> c) local_r4k_flush_cache_page() will check for a NULL return value from
> kmap_coherent(), and compensate by using indexed cacheops instead of hit
> cacheops
Too complicated. The fault is happening because the non-SMTC code is trying
to be terribly clever and pre-loading the TLB with a new wired entry. On
SMTC where multiple processors are sharing a single TLB are more careful
approach is needed so the code does a TLB probe and carefully and re-uses
an existing entry, if any. Which happens to be just what we need.
So how about below - only compile tested - patch?
Signed-off-by: Ralf Baechle <[email protected]>
arch/mips/mm/init.c | 35 +----------------------------------
1 files changed, 1 insertions(+), 34 deletions(-)
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 0e82050..ab11582 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -105,8 +105,8 @@ unsigned long setup_zero_pages(void)
return 1UL << order;
}
-#ifdef CONFIG_MIPS_MT_SMTC
static pte_t *kmap_coherent_pte;
+
static void __init kmap_coherent_init(void)
{
unsigned long vaddr;
@@ -115,9 +115,6 @@ static void __init kmap_coherent_init(void)
vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
}
-#else
-static inline void kmap_coherent_init(void) {}
-#endif
void *kmap_coherent(struct page *page, unsigned long addr)
{
@@ -131,9 +128,7 @@ void *kmap_coherent(struct page *page, unsigned long addr)
inc_preempt_count();
idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
-#ifdef CONFIG_MIPS_MT_SMTC
idx += FIX_N_COLOURS * smp_processor_id();
-#endif
vaddr = __fix_to_virt(FIX_CMAP_END - idx);
pte = mk_pte(page, PAGE_KERNEL);
#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
@@ -147,7 +142,6 @@ void *kmap_coherent(struct page *page, unsigned long addr)
write_c0_entryhi(vaddr & (PAGE_MASK << 1));
write_c0_entrylo0(entrylo);
write_c0_entrylo1(entrylo);
-#ifdef CONFIG_MIPS_MT_SMTC
set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
/* preload TLB instead of local_flush_tlb_one() */
mtc0_tlbw_hazard();
@@ -159,13 +153,6 @@ void *kmap_coherent(struct page *page, unsigned long addr)
tlb_write_random();
else
tlb_write_indexed();
-#else
- tlbidx = read_c0_wired();
- write_c0_wired(tlbidx + 1);
- write_c0_index(tlbidx);
- mtc0_tlbw_hazard();
- tlb_write_indexed();
-#endif
tlbw_use_hazard();
write_c0_entryhi(old_ctx);
EXIT_CRITICAL(flags);
@@ -177,24 +164,6 @@ void *kmap_coherent(struct page *page, unsigned long addr)
void kunmap_coherent(void)
{
-#ifndef CONFIG_MIPS_MT_SMTC
- unsigned int wired;
- unsigned long flags, old_ctx;
-
- ENTER_CRITICAL(flags);
- old_ctx = read_c0_entryhi();
- wired = read_c0_wired() - 1;
- write_c0_wired(wired);
- write_c0_index(wired);
- write_c0_entryhi(UNIQUE_ENTRYHI(wired));
- write_c0_entrylo0(0);
- write_c0_entrylo1(0);
- mtc0_tlbw_hazard();
- tlb_write_indexed();
- tlbw_use_hazard();
- write_c0_entryhi(old_ctx);
- EXIT_CRITICAL(flags);
-#endif
dec_preempt_count();
preempt_check_resched();
}
@@ -260,7 +229,6 @@ void copy_from_user_page(struct vm_area_struct *vma,
void __init fixrange_init(unsigned long start, unsigned long end,
pgd_t *pgd_base)
{
-#if defined(CONFIG_HIGHMEM) || defined(CONFIG_MIPS_MT_SMTC)
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
@@ -290,7 +258,6 @@ void __init fixrange_init(unsigned long start, unsigned long end,
}
j = 0;
}
-#endif
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
Create an extra set of fixmap entries for use in interrupt handlers.
This prevents fixmap VA conflicts between copy_user_highpage() running
in user context, and local_r4k_flush_cache_page() invoked from an SMP
IPI.
Signed-off-by: Kevin Cernekee <[email protected]>
---
arch/mips/include/asm/fixmap.h | 4 ++--
arch/mips/mm/init.c | 6 +++++-
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h
index 0f5caa1..dd924b9 100644
--- a/arch/mips/include/asm/fixmap.h
+++ b/arch/mips/include/asm/fixmap.h
@@ -48,9 +48,9 @@ enum fixed_addresses {
#define FIX_N_COLOURS 8
FIX_CMAP_BEGIN,
#ifdef CONFIG_MIPS_MT_SMTC
- FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS),
+ FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS * 2),
#else
- FIX_CMAP_END = FIX_CMAP_BEGIN + FIX_N_COLOURS,
+ FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * 2),
#endif
#ifdef CONFIG_HIGHMEM
/* reserved pte's for temporary kernel mappings */
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 0e82050..43ebe65 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/proc_fs.h>
#include <linux/pfn.h>
+#include <linux/hardirq.h>
#include <asm/asm-offsets.h>
#include <asm/bootinfo.h>
@@ -132,7 +133,10 @@ void *kmap_coherent(struct page *page, unsigned long addr)
inc_preempt_count();
idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
#ifdef CONFIG_MIPS_MT_SMTC
- idx += FIX_N_COLOURS * smp_processor_id();
+ idx += FIX_N_COLOURS * smp_processor_id() +
+ (in_interrupt() ? (FIX_N_COLOURS * NR_CPUS) : 0);
+#else
+ idx += in_interrupt() ? FIX_N_COLOURS : 0;
#endif
vaddr = __fix_to_virt(FIX_CMAP_END - idx);
pte = mk_pte(page, PAGE_KERNEL);
--
1.6.3.1
On Mon, Sep 7, 2009 at 3:26 AM, Ralf Baechle<[email protected]> wrote:
> Too complicated. The fault is happening because the non-SMTC code is trying
> to be terribly clever and pre-loading the TLB with a new wired entry. On
> SMTC where multiple processors are sharing a single TLB are more careful
> approach is needed so the code does a TLB probe and carefully and re-uses
> an existing entry, if any. Which happens to be just what we need.
>
> So how about below - only compile tested - patch?
That is an interesting idea. However, I am not sure we want the IPI
ISR to overwrite copy_user_highpage()'s TLB entry. That means that
when the interrupt returns, our coherent mapping will likely point to
a different page. It will avoid the machine check exception, but it
will potentially cause silent, intermittent data corruption instead.
Taking another cue from the SMTC implementation, though - my v2 patch
adds an extra set of fixmap addresses for the in_interrupt() case,
avoiding the VA conflict entirely. What do you think?
I tested this scheme on non-SMTC. I suspect that the same change is
required for MT + MP cores like the 1004K, but probably not MT only
cores like 34K which don't use cacheop IPIs.