2009-03-09 12:56:34

by Aaro Koskinen

[permalink] [raw]
Subject: [RFC PATCH 0/2] mm: tlb: unmap scalability

Hello,

Here's a patch proposal to make unmap to scale linearly on architectures
that implement tlb_start_vma() and tlb_end_vma(), by adding range
parameters. See <http://marc.info/?l=linux-kernel&m=123610437815468&w=2>
for the current problem.

The first patch only adds the new parameters. The second one changes the
ARM architecture to use those parameters. A similar change should be of
course made also for other architectures implementing those routines.

The patch was made for 2.6.29-rc7.

A.


2009-03-09 13:00:40

by Aaro Koskinen

[permalink] [raw]
Subject: [RFC PATCH 1/2] mm: tlb: Add range to tlb_start_vma() and tlb_end_vma()

Pass the range to be teared down with tlb_start_vma() and
tlb_end_vma(). This allows architectures doing per-VMA handling to flush
only the needed range instead of the full VMA region.

This patch changes the interface only, no changes in functionality.

Signed-off-by: Aaro Koskinen <[email protected]>
---
arch/alpha/include/asm/tlb.h | 4 ++--
arch/arm/include/asm/tlb.h | 6 ++++--
arch/avr32/include/asm/tlb.h | 4 ++--
arch/blackfin/include/asm/tlb.h | 4 ++--
arch/cris/include/asm/tlb.h | 4 ++--
arch/ia64/include/asm/tlb.h | 8 ++++----
arch/m68k/include/asm/tlb.h | 4 ++--
arch/mips/include/asm/tlb.h | 4 ++--
arch/parisc/include/asm/tlb.h | 4 ++--
arch/powerpc/include/asm/tlb.h | 4 ++--
arch/s390/include/asm/tlb.h | 4 ++--
arch/sh/include/asm/tlb.h | 4 ++--
arch/sparc/include/asm/tlb_32.h | 4 ++--
arch/sparc/include/asm/tlb_64.h | 4 ++--
arch/um/include/asm/tlb.h | 4 ++--
arch/x86/include/asm/tlb.h | 4 ++--
arch/xtensa/include/asm/tlb.h | 8 ++++----
include/asm-frv/tlb.h | 4 ++--
include/asm-m32r/tlb.h | 4 ++--
include/asm-mn10300/tlb.h | 4 ++--
mm/memory.c | 10 +++++++---
21 files changed, 53 insertions(+), 47 deletions(-)

diff --git a/arch/alpha/include/asm/tlb.h b/arch/alpha/include/asm/tlb.h
index c136365..26991bc 100644
--- a/arch/alpha/include/asm/tlb.h
+++ b/arch/alpha/include/asm/tlb.h
@@ -1,8 +1,8 @@
#ifndef _ALPHA_TLB_H
#define _ALPHA_TLB_H

-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)

#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 857f1df..d10c9c3 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -71,14 +71,16 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
* the vmas are adjusted to only cover the region to be torn down.
*/
static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long range_start, unsigned long range_end)
{
if (!tlb->fullmm)
flush_cache_range(vma, vma->vm_start, vma->vm_end);
}

static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long range_start, unsigned long range_end)
{
if (!tlb->fullmm)
flush_tlb_range(vma, vma->vm_start, vma->vm_end);
diff --git a/arch/avr32/include/asm/tlb.h b/arch/avr32/include/asm/tlb.h
index 5c55f9c..41381e9 100644
--- a/arch/avr32/include/asm/tlb.h
+++ b/arch/avr32/include/asm/tlb.h
@@ -8,10 +8,10 @@
#ifndef __ASM_AVR32_TLB_H
#define __ASM_AVR32_TLB_H

-#define tlb_start_vma(tlb, vma) \
+#define tlb_start_vma(tlb, vma, range_start, range_end) \
flush_cache_range(vma, vma->vm_start, vma->vm_end)

-#define tlb_end_vma(tlb, vma) \
+#define tlb_end_vma(tlb, vma, range_start, range_end) \
flush_tlb_range(vma, vma->vm_start, vma->vm_end)

#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while(0)
diff --git a/arch/blackfin/include/asm/tlb.h b/arch/blackfin/include/asm/tlb.h
index 89a12ee..cf7eb67 100644
--- a/arch/blackfin/include/asm/tlb.h
+++ b/arch/blackfin/include/asm/tlb.h
@@ -1,8 +1,8 @@
#ifndef _BLACKFIN_TLB_H
#define _BLACKFIN_TLB_H

-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)

/*
diff --git a/arch/cris/include/asm/tlb.h b/arch/cris/include/asm/tlb.h
index 77384ea..87e9879 100644
--- a/arch/cris/include/asm/tlb.h
+++ b/arch/cris/include/asm/tlb.h
@@ -9,8 +9,8 @@
* cris doesn't need any special per-pte or
* per-vma handling..
*/
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)

#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 20d8a39..b1c7bbf 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -25,14 +25,14 @@
* tlb <- tlb_gather_mmu(mm, full_mm_flush); // start unmap for address space MM
* {
* for each vma that needs a shootdown do {
- * tlb_start_vma(tlb, vma);
+ * tlb_start_vma(tlb, vma, range_start, range_end);
* for each page-table-entry PTE that needs to be removed do {
* tlb_remove_tlb_entry(tlb, pte, address);
* if (pte refers to a normal page) {
* tlb_remove_page(tlb, page);
* }
* }
- * tlb_end_vma(tlb, vma);
+ * tlb_end_vma(tlb, vma, range_start, range_end);
* }
* }
* tlb_finish_mmu(tlb, start, end); // finish unmap for address space MM
@@ -227,8 +227,8 @@ __tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long addre

#define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm)

-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)

#define tlb_remove_tlb_entry(tlb, ptep, addr) \
do { \
diff --git a/arch/m68k/include/asm/tlb.h b/arch/m68k/include/asm/tlb.h
index 1785cff..0363248 100644
--- a/arch/m68k/include/asm/tlb.h
+++ b/arch/m68k/include/asm/tlb.h
@@ -5,8 +5,8 @@
* m68k doesn't need any special per-pte or
* per-vma handling..
*/
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)

/*
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index 80d9dfc..8491179 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -5,12 +5,12 @@
* MIPS doesn't need any special per-pte or per-vma handling, except
* we need to flush cache for area to be unmapped.
*/
-#define tlb_start_vma(tlb, vma) \
+#define tlb_start_vma(tlb, vma, range_start, range_end) \
do { \
if (!tlb->fullmm) \
flush_cache_range(vma, vma->vm_start, vma->vm_end); \
} while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)

/*
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 383b1db..37c40e5 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -6,12 +6,12 @@ do { if ((tlb)->fullmm) \
flush_tlb_mm((tlb)->mm);\
} while (0)

-#define tlb_start_vma(tlb, vma) \
+#define tlb_start_vma(tlb, vma, range_start, range_end) \
do { if (!(tlb)->fullmm) \
flush_cache_range(vma, vma->vm_start, vma->vm_end); \
} while (0)

-#define tlb_end_vma(tlb, vma) \
+#define tlb_end_vma(tlb, vma, range_start, range_end) \
do { if (!(tlb)->fullmm) \
flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
} while (0)
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index e20ff75..d7ab142 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -27,8 +27,8 @@

struct mmu_gather;

-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)

#if !defined(CONFIG_PPC_STD_MMU)

diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 3d8a96d..718d16f 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -148,8 +148,8 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
#endif
}

-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
#define tlb_migrate_finish(mm) do { } while (0)

diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 88ff1ae..84ad1f9 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -7,10 +7,10 @@

#ifndef __ASSEMBLY__

-#define tlb_start_vma(tlb, vma) \
+#define tlb_start_vma(tlb, vma, range_start, range_end) \
flush_cache_range(vma, vma->vm_start, vma->vm_end)

-#define tlb_end_vma(tlb, vma) \
+#define tlb_end_vma(tlb, vma, range_start, range_end) \
flush_tlb_range(vma, vma->vm_start, vma->vm_end)

#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
diff --git a/arch/sparc/include/asm/tlb_32.h b/arch/sparc/include/asm/tlb_32.h
index 6d02d1c..8161627 100644
--- a/arch/sparc/include/asm/tlb_32.h
+++ b/arch/sparc/include/asm/tlb_32.h
@@ -1,12 +1,12 @@
#ifndef _SPARC_TLB_H
#define _SPARC_TLB_H

-#define tlb_start_vma(tlb, vma) \
+#define tlb_start_vma(tlb, vma, range_start, range_end) \
do { \
flush_cache_range(vma, vma->vm_start, vma->vm_end); \
} while (0)

-#define tlb_end_vma(tlb, vma) \
+#define tlb_end_vma(tlb, vma, range_start, range_end) \
do { \
flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
} while (0)
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index ec81cde..e5d121e 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page)
#define pud_free_tlb(tlb,pudp) __pud_free_tlb(tlb,pudp)

#define tlb_migrate_finish(mm) do { } while (0)
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)

#endif /* _SPARC64_TLB_H */
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 5240fa1..a2eafcc 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -7,8 +7,8 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>

-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)

/* struct mmu_gather is an opaque type used by the mm code for passing around
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 829215f..7421c06 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -1,8 +1,8 @@
#ifndef _ASM_X86_TLB_H
#define _ASM_X86_TLB_H

-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)

diff --git a/arch/xtensa/include/asm/tlb.h b/arch/xtensa/include/asm/tlb.h
index 31c220f..8f99a8e 100644
--- a/arch/xtensa/include/asm/tlb.h
+++ b/arch/xtensa/include/asm/tlb.h
@@ -18,18 +18,18 @@

/* Note, read http://lkml.org/lkml/2004/1/15/6 */

-# define tlb_start_vma(tlb,vma) do { } while (0)
-# define tlb_end_vma(tlb,vma) do { } while (0)
+# define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+# define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)

#else

-# define tlb_start_vma(tlb, vma) \
+# define tlb_start_vma(tlb, vma, range_start, range_end) \
do { \
if (!tlb->fullmm) \
flush_cache_range(vma, vma->vm_start, vma->vm_end); \
} while(0)

-# define tlb_end_vma(tlb, vma) \
+# define tlb_end_vma(tlb, vma, range_start, range_end) \
do { \
if (!tlb->fullmm) \
flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
diff --git a/include/asm-frv/tlb.h b/include/asm-frv/tlb.h
index cd458eb..8553784 100644
--- a/include/asm-frv/tlb.h
+++ b/include/asm-frv/tlb.h
@@ -12,8 +12,8 @@ extern void check_pgt_cache(void);
/*
* we don't need any special per-pte or per-vma handling...
*/
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)

/*
diff --git a/include/asm-m32r/tlb.h b/include/asm-m32r/tlb.h
index c7ebd8d..3f4c08d 100644
--- a/include/asm-m32r/tlb.h
+++ b/include/asm-m32r/tlb.h
@@ -5,8 +5,8 @@
* x86 doesn't need any special per-pte or
* per-vma handling..
*/
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)

/*
diff --git a/include/asm-mn10300/tlb.h b/include/asm-mn10300/tlb.h
index 65d232b..89acf74 100644
--- a/include/asm-mn10300/tlb.h
+++ b/include/asm-mn10300/tlb.h
@@ -19,8 +19,8 @@ extern void check_pgt_cache(void);
/*
* we don't need any special per-pte or per-vma handling...
*/
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma, range_start, range_end) do { } while (0)
+#define tlb_end_vma(tlb, vma, range_start, range_end) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)

/*
diff --git a/mm/memory.c b/mm/memory.c
index baa999e..44996b6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -896,17 +896,21 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,

static unsigned long unmap_page_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
- unsigned long addr, unsigned long end,
+ unsigned long range_start, unsigned long end,
long *zap_work, struct zap_details *details)
{
pgd_t *pgd;
unsigned long next;
+ unsigned long addr = range_start;
+ unsigned long range_end;

if (details && !details->check_mapping && !details->nonlinear_vma)
details = NULL;

BUG_ON(addr >= end);
- tlb_start_vma(tlb, vma);
+ BUG_ON(*zap_work <= 0);
+ range_end = addr + min(end - addr, (unsigned long)*zap_work);
+ tlb_start_vma(tlb, vma, range_start, range_end);
pgd = pgd_offset(vma->vm_mm, addr);
do {
next = pgd_addr_end(addr, end);
@@ -917,7 +921,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
next = zap_pud_range(tlb, vma, pgd, addr, next,
zap_work, details);
} while (pgd++, addr = next, (addr != end && *zap_work > 0));
- tlb_end_vma(tlb, vma);
+ tlb_end_vma(tlb, vma, range_start, range_end);

return addr;
}
--
1.5.4.3

2009-03-09 13:00:54

by Aaro Koskinen

[permalink] [raw]
Subject: [RFC PATCH 2/2] ARM: tlb: Use range in tlb_start_vma() and tlb_end_vma()

Flush only the pages that were unmapped.

Signed-off-by: Aaro Koskinen <[email protected]>
---
arch/arm/include/asm/tlb.h | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index d10c9c3..a034b6d 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -68,14 +68,14 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
/*
* In the case of tlb vma handling, we can optimise these away in the
* case where we're doing a full MM flush. When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
+ * the range is adjusted to only cover the region to be torn down.
*/
static inline void
tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long range_start, unsigned long range_end)
{
if (!tlb->fullmm)
- flush_cache_range(vma, vma->vm_start, vma->vm_end);
+ flush_cache_range(vma, range_start, range_end);
}

static inline void
@@ -83,7 +83,7 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long range_start, unsigned long range_end)
{
if (!tlb->fullmm)
- flush_tlb_range(vma, vma->vm_start, vma->vm_end);
+ flush_tlb_range(vma, range_start, range_end);
}

#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page)
--
1.5.4.3

2009-03-09 14:16:49

by Hugh Dickins

[permalink] [raw]
Subject: Re: [RFC PATCH 1/2] mm: tlb: Add range to tlb_start_vma() and tlb_end_vma()

On Mon, 9 Mar 2009, Aaro Koskinen wrote:

> Pass the range to be teared down with tlb_start_vma() and
> tlb_end_vma(). This allows architectures doing per-VMA handling to flush
> only the needed range instead of the full VMA region.
>
> This patch changes the interface only, no changes in functionality.
>
> Signed-off-by: Aaro Koskinen <[email protected]>
> ---
> arch/alpha/include/asm/tlb.h | 4 ++--
> arch/arm/include/asm/tlb.h | 6 ++++--
> arch/avr32/include/asm/tlb.h | 4 ++--
> arch/blackfin/include/asm/tlb.h | 4 ++--
> arch/cris/include/asm/tlb.h | 4 ++--
> arch/ia64/include/asm/tlb.h | 8 ++++----
> arch/m68k/include/asm/tlb.h | 4 ++--
> arch/mips/include/asm/tlb.h | 4 ++--
> arch/parisc/include/asm/tlb.h | 4 ++--
> arch/powerpc/include/asm/tlb.h | 4 ++--
> arch/s390/include/asm/tlb.h | 4 ++--
> arch/sh/include/asm/tlb.h | 4 ++--
> arch/sparc/include/asm/tlb_32.h | 4 ++--
> arch/sparc/include/asm/tlb_64.h | 4 ++--
> arch/um/include/asm/tlb.h | 4 ++--
> arch/x86/include/asm/tlb.h | 4 ++--
> arch/xtensa/include/asm/tlb.h | 8 ++++----
> include/asm-frv/tlb.h | 4 ++--
> include/asm-m32r/tlb.h | 4 ++--
> include/asm-mn10300/tlb.h | 4 ++--
> mm/memory.c | 10 +++++++---
> 21 files changed, 53 insertions(+), 47 deletions(-)
...
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -896,17 +896,21 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
>
> static unsigned long unmap_page_range(struct mmu_gather *tlb,
> struct vm_area_struct *vma,
> - unsigned long addr, unsigned long end,
> + unsigned long range_start, unsigned long end,
> long *zap_work, struct zap_details *details)
> {
> pgd_t *pgd;
> unsigned long next;
> + unsigned long addr = range_start;
> + unsigned long range_end;
>
> if (details && !details->check_mapping && !details->nonlinear_vma)
> details = NULL;
>
> BUG_ON(addr >= end);
> - tlb_start_vma(tlb, vma);
> + BUG_ON(*zap_work <= 0);
> + range_end = addr + min(end - addr, (unsigned long)*zap_work);
> + tlb_start_vma(tlb, vma, range_start, range_end);
> pgd = pgd_offset(vma->vm_mm, addr);
> do {
> next = pgd_addr_end(addr, end);
> @@ -917,7 +921,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
> next = zap_pud_range(tlb, vma, pgd, addr, next,
> zap_work, details);
> } while (pgd++, addr = next, (addr != end && *zap_work > 0));
> - tlb_end_vma(tlb, vma);
> + tlb_end_vma(tlb, vma, range_start, range_end);
>
> return addr;
> }
> --

Sorry, I don't like this second-guessing of zap_work at all (okay,
we all hate zap_work, and would love to rework the tlb mmu_gather
stuff to be preemptible, but the file truncation case has so far
discouraged us).

Take a look at the levels below, in particular zap_pte_range(),
and you'll see that zap_work is just an approximate cap upon the
amount of work being done while zapping, and is decremented by
wildly different amounts if a pte (or swap entry) is there or not.

So the range_end you calculate will usually be misleadingly
different from the actual end of the range.

I don't see that you need to change the interface and other arches
at all. What prevents ARM from noting the first and last addresses
freed in its struct mmu_gather when tlb_remove_tlb_entry() is called
(see arch/um/include/asm/tlb.h for an example of that), then using
that in its tlb_end_vma() TLB flushing?

Admittedly you won't know the end for cache flusing in tlb_start_vma(),
but you haven't mentioned that one as a problem, and I expect you can
devise (ARM-specific) optimizations to avoid repetition there too.

Hugh

2009-03-09 17:01:00

by Aaro Koskinen

[permalink] [raw]
Subject: Re: [RFC PATCH 1/2] mm: tlb: Add range to tlb_start_vma() and tlb_end_vma()

Hello,

Hugh Dickins wrote:
> On Mon, 9 Mar 2009, Aaro Koskinen wrote:
>> Pass the range to be teared down with tlb_start_vma() and
>> tlb_end_vma(). This allows architectures doing per-VMA handling to flush
>> only the needed range instead of the full VMA region.
[...]
>> static unsigned long unmap_page_range(struct mmu_gather *tlb,
>> struct vm_area_struct *vma,
>> - unsigned long addr, unsigned long end,
>> + unsigned long range_start, unsigned long end,
>> long *zap_work, struct zap_details *details)
>> {
>> pgd_t *pgd;
>> unsigned long next;
>> + unsigned long addr = range_start;
>> + unsigned long range_end;
>>
>> if (details && !details->check_mapping && !details->nonlinear_vma)
>> details = NULL;
>>
>> BUG_ON(addr >= end);
>> - tlb_start_vma(tlb, vma);
>> + BUG_ON(*zap_work <= 0);
>> + range_end = addr + min(end - addr, (unsigned long)*zap_work);
>> + tlb_start_vma(tlb, vma, range_start, range_end);
>> pgd = pgd_offset(vma->vm_mm, addr);
>> do {
>> next = pgd_addr_end(addr, end);
>> @@ -917,7 +921,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
>> next = zap_pud_range(tlb, vma, pgd, addr, next,
>> zap_work, details);
>> } while (pgd++, addr = next, (addr != end && *zap_work > 0));
>> - tlb_end_vma(tlb, vma);
>> + tlb_end_vma(tlb, vma, range_start, range_end);
>>
>> return addr;
>> }
>
> Sorry, I don't like this second-guessing of zap_work at all (okay,
> we all hate zap_work, and would love to rework the tlb mmu_gather
> stuff to be preemptible, but the file truncation case has so far
> discouraged us).
>
> Take a look at the levels below, in particular zap_pte_range(),
> and you'll see that zap_work is just an approximate cap upon the
> amount of work being done while zapping, and is decremented by
> wildly different amounts if a pte (or swap entry) is there or not.
>
> So the range_end you calculate will usually be misleadingly
> different from the actual end of the range.

You are right. Somehow I assumed it would simply define the maximum
range in bytes, but I now realize it does not. So the range calculation
is totally wrong. For tlb_end_vma() the range end would be available in
addr, though, but that is probably irrelevant because of what you said:

> I don't see that you need to change the interface and other arches
> at all. What prevents ARM from noting the first and last addresses
> freed in its struct mmu_gather when tlb_remove_tlb_entry() is called
> (see arch/um/include/asm/tlb.h for an example of that), then using
> that in its tlb_end_vma() TLB flushing?

This would probably work, thanks for pointing it out. I should have
taken a better look of the full API, not just what was implemented in ARM.

So, there's a new ARM-only patch draft below based on this idea, adding
also linux-arm-kernel again.

> Admittedly you won't know the end for cache flusing in tlb_start_vma(),
> but you haven't mentioned that one as a problem, and I expect you can
> devise (ARM-specific) optimizations to avoid repetition there too.

Yes, the execution time of tlb_start_vma() does not depend on the range
size, so that is a lesser problem.

Thanks,

A.

---

From: Aaro Koskinen <[email protected]>
Subject: [RFC PATCH] [ARM] Flush only the needed range when unmapping VMA

Signed-off-by: Aaro Koskinen <[email protected]>
---
arch/arm/include/asm/tlb.h | 25 ++++++++++++++++++++++---
1 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 857f1df..2729fb9 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -36,6 +36,8 @@
struct mmu_gather {
struct mm_struct *mm;
unsigned int fullmm;
+ unsigned long range_start;
+ unsigned long range_end;
};

DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -47,6 +49,8 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int
full_mm_flush)

tlb->mm = mm;
tlb->fullmm = full_mm_flush;
+ tlb->range_start = TASK_SIZE;
+ tlb->range_end = 0;

return tlb;
}
@@ -63,7 +67,19 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long
start, unsigned long end)
put_cpu_var(mmu_gathers);
}

-#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0)
+/*
+ * Memorize the range for the TLB flush.
+ */
+static inline void
+tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long
addr)
+{
+ if (!tlb->fullmm) {
+ if (addr < tlb->range_start)
+ tlb->range_start = addr;
+ if (addr + PAGE_SIZE > tlb->range_end)
+ tlb->range_end = addr + PAGE_SIZE;
+ }
+}

/*
* In the case of tlb vma handling, we can optimise these away in the
@@ -80,8 +96,11 @@ tlb_start_vma(struct mmu_gather *tlb, struct
vm_area_struct *vma)
static inline void
tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
- if (!tlb->fullmm)
- flush_tlb_range(vma, vma->vm_start, vma->vm_end);
+ if (!tlb->fullmm && tlb->range_end > 0) {
+ flush_tlb_range(vma, tlb->range_start, tlb->range_end);
+ tlb->range_start = TASK_SIZE;
+ tlb->range_end = 0;
+ }
}

#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page)
--
1.5.4.3

2009-03-09 18:21:35

by Hugh Dickins

[permalink] [raw]
Subject: Re: [RFC PATCH 1/2] mm: tlb: Add range to tlb_start_vma() and tlb_end_vma()

On Mon, 9 Mar 2009, Aaro Koskinen wrote:
> Hugh Dickins wrote:
>
> > I don't see that you need to change the interface and other arches
> > at all. What prevents ARM from noting the first and last addresses
> > freed in its struct mmu_gather when tlb_remove_tlb_entry() is called
> > (see arch/um/include/asm/tlb.h for an example of that), then using
> > that in its tlb_end_vma() TLB flushing?
>
> This would probably work, thanks for pointing it out. I should have taken a
> better look of the full API, not just what was implemented in ARM.
>
> So, there's a new ARM-only patch draft below based on this idea, adding also
> linux-arm-kernel again.

This one is much better, thank you. I would think it more natural
to do the initialization of range_start and range_end in your
tlb_start_vma() - to complement tlb_end_vma() where you deal with
the final result - rather than in two places you have sited it;
but that's somewhat a matter of taste, your patch should work as is.

Hugh

>
> ---
>
> From: Aaro Koskinen <[email protected]>
> Subject: [RFC PATCH] [ARM] Flush only the needed range when unmapping VMA
>
> Signed-off-by: Aaro Koskinen <[email protected]>
> ---
> arch/arm/include/asm/tlb.h | 25 ++++++++++++++++++++++---
> 1 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
> index 857f1df..2729fb9 100644
> --- a/arch/arm/include/asm/tlb.h
> +++ b/arch/arm/include/asm/tlb.h
> @@ -36,6 +36,8 @@
> struct mmu_gather {
> struct mm_struct *mm;
> unsigned int fullmm;
> + unsigned long range_start;
> + unsigned long range_end;
> };
>
> DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
> @@ -47,6 +49,8 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int
> full_mm_flush)
>
> tlb->mm = mm;
> tlb->fullmm = full_mm_flush;
> + tlb->range_start = TASK_SIZE;
> + tlb->range_end = 0;
>
> return tlb;
> }
> @@ -63,7 +67,19 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
> unsigned long end)
> put_cpu_var(mmu_gathers);
> }
>
> -#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0)
> +/*
> + * Memorize the range for the TLB flush.
> + */
> +static inline void
> +tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
> +{
> + if (!tlb->fullmm) {
> + if (addr < tlb->range_start)
> + tlb->range_start = addr;
> + if (addr + PAGE_SIZE > tlb->range_end)
> + tlb->range_end = addr + PAGE_SIZE;
> + }
> +}
>
> /*
> * In the case of tlb vma handling, we can optimise these away in the
> @@ -80,8 +96,11 @@ tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct
> *vma)
> static inline void
> tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
> {
> - if (!tlb->fullmm)
> - flush_tlb_range(vma, vma->vm_start, vma->vm_end);
> + if (!tlb->fullmm && tlb->range_end > 0) {
> + flush_tlb_range(vma, tlb->range_start, tlb->range_end);
> + tlb->range_start = TASK_SIZE;
> + tlb->range_end = 0;
> + }
> }
>
> #define tlb_remove_page(tlb,page) free_page_and_swap_cache(page)
> --
> 1.5.4.3