This series of patches takes Toshi Kani <[email protected]>'s
patches ("fix memory leak/panic in ioremap huge pages") as base
and re-bring huge_vmap back for arm64.
This series of patches are tested on 4.16 kernel with Cortex-A75
based SoC.
The test used for verifying these patches is a stress test on
ioremap/unmap which tries to re-use same io-address but changes
size of mapping randomly i.e. 4K to 2M to 16K etc. The same test
used to reproduce 3rd level translation fault without these fixes
(and also of course with Revert "arm64: Enforce BBM for huge IO/VMAP
mappings" being part of the tree).
These patches can also go into '-stable' branch (if accepted)
for 4.6 onwards.
From V8->V9:
- Used __TLBI_VADDR macros in new TLB flush API
From V7->V8:
- Properly fixed compilation issue in x86 file
From V6->V7:
- Fixed compilation issue in x86 case
- V6 patches were not properly enumarated
From V5->V6:
- Use __flush_tlb_kernel_pgtable() for both PUD and PMD. Remove
"bool tlb_inv" based variance as it is not need now
- Re-naming for consistency
From V4->V5:
- Add new API __flush_tlb_kernel_pgtable(unsigned long addr)
for kernel addresses
From V3->V4:
- Add header for 'addr' in x86 implementation
- Re-order pmd/pud clear and table free
- Avoid redundant TLB invalidatation in one perticular case
From V2->V3:
- Use the exisiting page table free interface to do arm64
specific things
From V1->V2:
- Rebased my patches on top of "[PATCH v2 1/2] mm/vmalloc:
Add interfaces to free unmapped page table"
- Honored BBM for ARM64
Chintan Pandya (4):
ioremap: Update pgtable free interfaces with addr
arm64: tlbflush: Introduce __flush_tlb_kernel_pgtable
arm64: Implement page table free interfaces
Revert "arm64: Enforce BBM for huge IO/VMAP mappings"
arch/arm64/include/asm/tlbflush.h | 6 ++++++
arch/arm64/mm/mmu.c | 37 +++++++++++++++++++++++++------------
arch/x86/mm/pgtable.c | 8 +++++---
include/asm-generic/pgtable.h | 8 ++++----
lib/ioremap.c | 4 ++--
5 files changed, 42 insertions(+), 21 deletions(-)
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project
This patch ("mm/vmalloc: Add interfaces to free unmapped
page table") adds following 2 interfaces to free the page
table in case we implement huge mapping.
pud_free_pmd_page() and pmd_free_pte_page()
Some architectures (like arm64) needs to do proper TLB
maintanance after updating pagetable entry even in map.
Why ? Read this,
https://patchwork.kernel.org/patch/10134581/
Pass 'addr' in these interfaces so that proper TLB ops
can be performed.
Signed-off-by: Chintan Pandya <[email protected]>
---
arch/arm64/mm/mmu.c | 4 ++--
arch/x86/mm/pgtable.c | 8 +++++---
include/asm-generic/pgtable.h | 8 ++++----
lib/ioremap.c | 4 ++--
4 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 2dbb2c9..da98828 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -973,12 +973,12 @@ int pmd_clear_huge(pmd_t *pmdp)
return 1;
}
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
return pud_none(*pud);
}
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
return pmd_none(*pmd);
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ffc8c13..37e3cba 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -718,11 +718,12 @@ int pmd_clear_huge(pmd_t *pmd)
/**
* pud_free_pmd_page - Clear pud entry and free pmd page.
* @pud: Pointer to a PUD.
+ * @addr: Virtual address associated with pud.
*
* Context: The pud range has been unmaped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
pmd_t *pmd;
int i;
@@ -733,7 +734,7 @@ int pud_free_pmd_page(pud_t *pud)
pmd = (pmd_t *)pud_page_vaddr(*pud);
for (i = 0; i < PTRS_PER_PMD; i++)
- if (!pmd_free_pte_page(&pmd[i]))
+ if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
return 0;
pud_clear(pud);
@@ -745,11 +746,12 @@ int pud_free_pmd_page(pud_t *pud)
/**
* pmd_free_pte_page - Clear pmd entry and free pte page.
* @pmd: Pointer to a PMD.
+ * @addr: Virtual address associated with pmd.
*
* Context: The pmd range has been unmaped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
pte_t *pte;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f59639a..b081794 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1019,8 +1019,8 @@ static inline int p4d_clear_huge(p4d_t *p4d)
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pud_clear_huge(pud_t *pud);
int pmd_clear_huge(pmd_t *pmd);
-int pud_free_pmd_page(pud_t *pud);
-int pmd_free_pte_page(pmd_t *pmd);
+int pud_free_pmd_page(pud_t *pud, unsigned long addr);
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
@@ -1046,11 +1046,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
{
return 0;
}
-static inline int pud_free_pmd_page(pud_t *pud)
+static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
return 0;
}
-static inline int pmd_free_pte_page(pmd_t *pmd)
+static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
return 0;
}
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 54e5bba..517f585 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
if (ioremap_pmd_enabled() &&
((next - addr) == PMD_SIZE) &&
IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
- pmd_free_pte_page(pmd)) {
+ pmd_free_pte_page(pmd, addr)) {
if (pmd_set_huge(pmd, phys_addr + addr, prot))
continue;
}
@@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
if (ioremap_pud_enabled() &&
((next - addr) == PUD_SIZE) &&
IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
- pud_free_pmd_page(pud)) {
+ pud_free_pmd_page(pud, addr)) {
if (pud_set_huge(pud, phys_addr + addr, prot))
continue;
}
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project
Add an interface to invalidate intermediate page tables
from TLB for kernel.
Signed-off-by: Chintan Pandya <[email protected]>
---
arch/arm64/include/asm/tlbflush.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index dfc61d7..a4a1901 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -218,6 +218,13 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm,
dsb(ish);
}
+static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
+{
+ unsigned long addr = __TLBI_VADDR(kaddr, 0);
+
+ __tlbi(vaae1is, addr);
+ dsb(ish);
+}
#endif
#endif
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project
Implement pud_free_pmd_page() and pmd_free_pte_page().
Implementation requires,
1) Clearing off the current pud/pmd entry
2) Invalidate TLB which could have previously
valid but not stale entry
3) Freeing of the un-used next level page tables
Signed-off-by: Chintan Pandya <[email protected]>
---
arch/arm64/mm/mmu.c | 29 +++++++++++++++++++++++++----
1 file changed, 25 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index da98828..0f651db 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -45,6 +45,7 @@
#include <asm/memblock.h>
#include <asm/mmu_context.h>
#include <asm/ptdump.h>
+#include <asm/tlbflush.h>
#define NO_BLOCK_MAPPINGS BIT(0)
#define NO_CONT_MAPPINGS BIT(1)
@@ -973,12 +974,32 @@ int pmd_clear_huge(pmd_t *pmdp)
return 1;
}
-int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
{
- return pud_none(*pud);
+ pmd_t *table;
+
+ if (pmd_present(READ_ONCE(*pmdp))) {
+ table = __va(pmd_val(*pmdp));
+ pmd_clear(pmdp);
+ __flush_tlb_kernel_pgtable(addr);
+ free_page((unsigned long) table);
+ }
+ return 1;
}
-int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
{
- return pmd_none(*pmd);
+ pmd_t *table;
+ int i;
+
+ if (pud_present(READ_ONCE(*pudp))) {
+ table = __va(pud_val(*pudp));
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ pmd_free_pte_page(&table[i], addr + (i * PMD_SIZE));
+
+ pud_clear(pudp);
+ __flush_tlb_kernel_pgtable(addr);
+ free_page((unsigned long) table);
+ }
+ return 1;
}
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project
This commit 15122ee2c515a ("arm64: Enforce BBM for huge
IO/VMAP mappings") is a temporary work-around until the
issues with CONFIG_HAVE_ARCH_HUGE_VMAP gets fixed.
Revert this change as we have fixes for the issue.
Signed-off-by: Chintan Pandya <[email protected]>
---
arch/arm64/mm/mmu.c | 8 --------
1 file changed, 8 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0f651db..170009b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -935,10 +935,6 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
- /* ioremap_page_range doesn't honour BBM */
- if (pud_present(READ_ONCE(*pudp)))
- return 0;
-
BUG_ON(phys & ~PUD_MASK);
set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
return 1;
@@ -949,10 +945,6 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
- /* ioremap_page_range doesn't honour BBM */
- if (pmd_present(READ_ONCE(*pmdp)))
- return 0;
-
BUG_ON(phys & ~PMD_MASK);
set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
return 1;
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project
Hi Chintan,
[as a side note: I'm confused on the status of this patch series, as part
of it was reposted separately by Toshi. Please can you work together?]
On Mon, Apr 30, 2018 at 01:11:33PM +0530, Chintan Pandya wrote:
> Implement pud_free_pmd_page() and pmd_free_pte_page().
>
> Implementation requires,
> 1) Clearing off the current pud/pmd entry
> 2) Invalidate TLB which could have previously
> valid but not stale entry
> 3) Freeing of the un-used next level page tables
>
> Signed-off-by: Chintan Pandya <[email protected]>
> ---
> arch/arm64/mm/mmu.c | 29 +++++++++++++++++++++++++----
> 1 file changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index da98828..0f651db 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -45,6 +45,7 @@
> #include <asm/memblock.h>
> #include <asm/mmu_context.h>
> #include <asm/ptdump.h>
> +#include <asm/tlbflush.h>
>
> #define NO_BLOCK_MAPPINGS BIT(0)
> #define NO_CONT_MAPPINGS BIT(1)
> @@ -973,12 +974,32 @@ int pmd_clear_huge(pmd_t *pmdp)
> return 1;
> }
>
> -int pud_free_pmd_page(pud_t *pud, unsigned long addr)
> +int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
> {
> - return pud_none(*pud);
> + pmd_t *table;
> +
> + if (pmd_present(READ_ONCE(*pmdp))) {
Might also be worth checking pmd_table here, just in case. (same for pud)
> + table = __va(pmd_val(*pmdp));
Can you avoid dereferencing *pmdp twice, and instead READ_ONCE into a local
variable, please? (same for pud)
> + pmd_clear(pmdp);
> + __flush_tlb_kernel_pgtable(addr);
> + free_page((unsigned long) table);
Shouldn't this be pte_free_kernel, to pair with pte_alloc_kernel which
was used to allocate the page in the first place? (similarly for pud)
> + }
> + return 1;
> }
>
> -int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
> +int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
> {
> - return pmd_none(*pmd);
> + pmd_t *table;
> + int i;
> +
> + if (pud_present(READ_ONCE(*pudp))) {
> + table = __va(pud_val(*pudp));
> + for (i = 0; i < PTRS_PER_PMD; i++)
> + pmd_free_pte_page(&table[i], addr + (i * PMD_SIZE));
I think it would be cleaner to write this as a do { ... } while, for
consistency with the ioremap and vmalloc code.
Will
On Wed, 2018-05-23 at 15:01 +0100, Will Deacon wrote:
> Hi Chintan,
>
> [as a side note: I'm confused on the status of this patch series, as part
> of it was reposted separately by Toshi. Please can you work together?]
I do not know the status of my patch series, either... That being said,
I made my x86 patches based off from Chintan's 1/4 patch (which changes
both x86 and arm) so that my series won't conflict with his.
Chintan,
If you need to update your series before mine's accepted, please make
sure to use the updated 1/4 below. I've updated the descriptions per
review comment.
https://patchwork.kernel.org/patch/10407065/
Thanks,
-Toshi
On 5/23/2018 8:04 PM, Kani, Toshi wrote:
> On Wed, 2018-05-23 at 15:01 +0100, Will Deacon wrote:
>> Hi Chintan,
>>
>> [as a side note: I'm confused on the status of this patch series, as part
>> of it was reposted separately by Toshi. Please can you work together?]
>
> I do not know the status of my patch series, either... That being said,
> I made my x86 patches based off from Chintan's 1/4 patch (which changes
> both x86 and arm) so that my series won't conflict with his.
>
> Chintan,
Hi Toshi,
> If you need to update your series before mine's accepted, please make
> sure to use the updated 1/4 below. I've updated the descriptions per
> review comment.
> https://patchwork.kernel.org/patch/10407065/
For the sake of completeness, I will re-push my previous 1/4 but will
take your version of change log. I've seen this and your change log
describes the change better.
>
> Thanks,
> -Toshi
>
Chintan
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center,
Inc. is a member of the Code Aurora Forum, a Linux Foundation
Collaborative Project
On 5/23/2018 7:31 PM, Will Deacon wrote:
> Hi Chintan,
Hi Will,
>
> [as a side note: I'm confused on the status of this patch series, as part
> of it was reposted separately by Toshi. Please can you work together?]
I will share all 4 patches once again as v10 and take latest version of
1/4 as updated by Toshi.
>
> On Mon, Apr 30, 2018 at 01:11:33PM +0530, Chintan Pandya wrote:
>> Implement pud_free_pmd_page() and pmd_free_pte_page().
>>
>> Implementation requires,
>> 1) Clearing off the current pud/pmd entry
>> 2) Invalidate TLB which could have previously
>> valid but not stale entry
>> 3) Freeing of the un-used next level page tables
>>
>> Signed-off-by: Chintan Pandya <[email protected]>
>> ---
>> arch/arm64/mm/mmu.c | 29 +++++++++++++++++++++++++----
>> 1 file changed, 25 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index da98828..0f651db 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -45,6 +45,7 @@
>> #include <asm/memblock.h>
>> #include <asm/mmu_context.h>
>> #include <asm/ptdump.h>
>> +#include <asm/tlbflush.h>
>>
>> #define NO_BLOCK_MAPPINGS BIT(0)
>> #define NO_CONT_MAPPINGS BIT(1)
>> @@ -973,12 +974,32 @@ int pmd_clear_huge(pmd_t *pmdp)
>> return 1;
>> }
>>
>> -int pud_free_pmd_page(pud_t *pud, unsigned long addr)
>> +int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
>> {
>> - return pud_none(*pud);
>> + pmd_t *table;
>> +
>> + if (pmd_present(READ_ONCE(*pmdp))) {
>
> Might also be worth checking pmd_table here, just in case. (same for pud)
I had that check in v2 as below.
if (pud_val(*pud) && !pud_huge(*pud))
But removed that in v3 as unmap should change this to NONE if it is
not table. I still don't see the need of it.
>
>> + table = __va(pmd_val(*pmdp));
>
> Can you avoid dereferencing *pmdp twice, and instead READ_ONCE into a local
> variable, please? (same for pud)
Okay.
>
>> + pmd_clear(pmdp);
>> + __flush_tlb_kernel_pgtable(addr);
>> + free_page((unsigned long) table);
>
> Shouldn't this be pte_free_kernel, to pair with pte_alloc_kernel which
> was used to allocate the page in the first place? (similarly for pud)
Okay.
>
>> + }
>> + return 1;
>> }
>>
>> -int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
>> +int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
>> {
>> - return pmd_none(*pmd);
>> + pmd_t *table;
>> + int i;
>> +
>> + if (pud_present(READ_ONCE(*pudp))) {
>> + table = __va(pud_val(*pudp));
>> + for (i = 0; i < PTRS_PER_PMD; i++)
>> + pmd_free_pte_page(&table[i], addr + (i * PMD_SIZE));
>
> I think it would be cleaner to write this as a do { ... } while, for
> consistency with the ioremap and vmalloc code.
Okay.
I'll raise v10 fixing above things. Thanks for the review.
>
> Will
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Chintan
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center,
Inc. is a member of the Code Aurora Forum, a Linux Foundation
Collaborative Project