This series fixes two issues in the x86 ioremap free page handlings
for pud/pmd mappings.
Patch 01 fixes BUG_ON on x86-PAE reported by Joerg. It disables
the free page handling on x86-PAE.
Patch 02-03 fixes a possible issue with speculation which can cause
stale page-directory cache.
- Patch 02 is from Chintan's v9 01/04 patch [1], which adds a new arg
'addr', with my merge change to patch 01.
- Patch 03 adds a TLB purge (INVLPG) to purge page-structure caches
that may be cached by speculation. See the patch descriptions for
more detal.
The patches are based off from the tip tree.
[1] https://patchwork.kernel.org/patch/10371015/
v4:
- Re-wrote patch 2/3 description. (v3-UPDATE)
- Added NOTE to pud_free_pmd_page().
v3:
- Fixed a build error in v2.
v2:
- Reordered patch-set, so that patch 01 can be applied independently.
- Added a NULL pointer check for the page alloc in patch 03.
---
Toshi Kani (2):
1/3 x86/mm: disable ioremap free page handling on x86-PAE
3/3 x86/mm: add TLB purge to free pmd/pte page interfaces
Chintan Pandya (1):
2/3 ioremap: Update pgtable free interfaces with addr
---
arch/arm64/mm/mmu.c | 4 +--
arch/x86/mm/pgtable.c | 61 +++++++++++++++++++++++++++++++++++++------
include/asm-generic/pgtable.h | 8 +++---
lib/ioremap.c | 4 +--
4 files changed, 61 insertions(+), 16 deletions(-)
From: Chintan Pandya <[email protected]>
The following kernel panic was observed on ARM64 platform due to a stale
TLB entry.
1. ioremap with 4K size, a valid pte page table is set.
2. iounmap it, its pte entry is set to 0.
3. ioremap the same address with 2M size, update its pmd entry with
a new value.
4. CPU may hit an exception because the old pmd entry is still in TLB,
which leads to a kernel panic.
Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
table") has addressed this panic by falling to pte mappings in the above
case on ARM64.
To support pmd mappings in all cases, TLB purge needs to be performed
in this case on ARM64.
Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
so that TLB purge can be added later in seprate patches.
[[email protected]: merge changes, rewrite patch description]
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Chintan Pandya <[email protected]>
Signed-off-by: Toshi Kani <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: <[email protected]>
---
arch/arm64/mm/mmu.c | 4 ++--
arch/x86/mm/pgtable.c | 12 +++++++-----
include/asm-generic/pgtable.h | 8 ++++----
lib/ioremap.c | 4 ++--
4 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 493ff75670ff..8ae5d7ae4af3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -977,12 +977,12 @@ int pmd_clear_huge(pmd_t *pmdp)
return 1;
}
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
return pud_none(*pud);
}
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
return pmd_none(*pmd);
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1aeb7a5dbce5..fbd14e506758 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -723,11 +723,12 @@ int pmd_clear_huge(pmd_t *pmd)
/**
* pud_free_pmd_page - Clear pud entry and free pmd page.
* @pud: Pointer to a PUD.
+ * @addr: Virtual address associated with pud.
*
* Context: The pud range has been unmaped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
pmd_t *pmd;
int i;
@@ -738,7 +739,7 @@ int pud_free_pmd_page(pud_t *pud)
pmd = (pmd_t *)pud_page_vaddr(*pud);
for (i = 0; i < PTRS_PER_PMD; i++)
- if (!pmd_free_pte_page(&pmd[i]))
+ if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
return 0;
pud_clear(pud);
@@ -750,11 +751,12 @@ int pud_free_pmd_page(pud_t *pud)
/**
* pmd_free_pte_page - Clear pmd entry and free pte page.
* @pmd: Pointer to a PMD.
+ * @addr: Virtual address associated with pmd.
*
* Context: The pmd range has been unmaped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
pte_t *pte;
@@ -770,7 +772,7 @@ int pmd_free_pte_page(pmd_t *pmd)
#else /* !CONFIG_X86_64 */
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
return pud_none(*pud);
}
@@ -779,7 +781,7 @@ int pud_free_pmd_page(pud_t *pud)
* Disable free page handling on x86-PAE. This assures that ioremap()
* does not update sync'd pmd entries. See vmalloc_sync_one().
*/
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
return pmd_none(*pmd);
}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f59639afaa39..b081794ba135 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1019,8 +1019,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pud_clear_huge(pud_t *pud);
int pmd_clear_huge(pmd_t *pmd);
-int pud_free_pmd_page(pud_t *pud);
-int pmd_free_pte_page(pmd_t *pmd);
+int pud_free_pmd_page(pud_t *pud, unsigned long addr);
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
@@ -1046,11 +1046,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
{
return 0;
}
-static inline int pud_free_pmd_page(pud_t *pud)
+static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
return 0;
}
-static inline int pmd_free_pte_page(pmd_t *pmd)
+static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
return 0;
}
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 54e5bbaa3200..517f5853ffed 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
if (ioremap_pmd_enabled() &&
((next - addr) == PMD_SIZE) &&
IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
- pmd_free_pte_page(pmd)) {
+ pmd_free_pte_page(pmd, addr)) {
if (pmd_set_huge(pmd, phys_addr + addr, prot))
continue;
}
@@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
if (ioremap_pud_enabled() &&
((next - addr) == PUD_SIZE) &&
IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
- pud_free_pmd_page(pud)) {
+ pud_free_pmd_page(pud, addr)) {
if (pud_set_huge(pud, phys_addr + addr, prot))
continue;
}
ioremap() supports pmd mappings on x86-PAE. However, kernel's pmd
tables are not shared among processes on x86-PAE. Therefore, any
update to sync'd pmd entries need re-syncing. Freeing a pte page
also leads to a vmalloc fault and hits the BUG_ON in vmalloc_sync_one().
Disable free page handling on x86-PAE. pud_free_pmd_page() and
pmd_free_pte_page() simply return 0 if a given pud/pmd entry is present.
This assures that ioremap() does not update sync'd pmd entries at the
cost of falling back to pte mappings.
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Reported-by: Joerg Roedel <[email protected]>
Signed-off-by: Toshi Kani <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: <[email protected]>
---
arch/x86/mm/pgtable.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 47b5951e592b..1aeb7a5dbce5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -719,6 +719,7 @@ int pmd_clear_huge(pmd_t *pmd)
return 0;
}
+#ifdef CONFIG_X86_64
/**
* pud_free_pmd_page - Clear pud entry and free pmd page.
* @pud: Pointer to a PUD.
@@ -766,4 +767,22 @@ int pmd_free_pte_page(pmd_t *pmd)
return 1;
}
+
+#else /* !CONFIG_X86_64 */
+
+int pud_free_pmd_page(pud_t *pud)
+{
+ return pud_none(*pud);
+}
+
+/*
+ * Disable free page handling on x86-PAE. This assures that ioremap()
+ * does not update sync'd pmd entries. See vmalloc_sync_one().
+ */
+int pmd_free_pte_page(pmd_t *pmd)
+{
+ return pmd_none(*pmd);
+}
+
+#endif /* CONFIG_X86_64 */
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
ioremap() calls pud_free_pmd_page() / pmd_free_pte_page() when it creates
a pud / pmd map. The following preconditions are met at their entry.
- All pte entries for a target pud/pmd address range have been cleared.
- System-wide TLB purges have been peformed for a target pud/pmd address
range.
The preconditions assure that there is no stale TLB entry for the range.
Speculation may not cache TLB entries since it requires all levels of page
entries, including ptes, to have P & A-bits set for an associated address.
However, speculation may cache pud/pmd entries (paging-structure caches)
when they have P-bit set.
Add a system-wide TLB purge (INVLPG) to a single page after clearing
pud/pmd entry's P-bit.
SDM 4.10.4.1, Operation that Invalidate TLBs and Paging-Structure Caches,
states that:
INVLPG invalidates all paging-structure caches associated with the
current PCID regardless of the liner addresses to which they correspond.
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Toshi Kani <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: <[email protected]>
---
arch/x86/mm/pgtable.c | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index fbd14e506758..e3deefb891da 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -725,24 +725,44 @@ int pmd_clear_huge(pmd_t *pmd)
* @pud: Pointer to a PUD.
* @addr: Virtual address associated with pud.
*
- * Context: The pud range has been unmaped and TLB purged.
+ * Context: The pud range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
+ *
+ * NOTE: Callers must allow a single page allocation.
*/
int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
- pmd_t *pmd;
+ pmd_t *pmd, *pmd_sv;
+ pte_t *pte;
int i;
if (pud_none(*pud))
return 1;
pmd = (pmd_t *)pud_page_vaddr(*pud);
+ pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
+ if (!pmd_sv)
+ return 0;
- for (i = 0; i < PTRS_PER_PMD; i++)
- if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
- return 0;
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd_sv[i] = pmd[i];
+ if (!pmd_none(pmd[i]))
+ pmd_clear(&pmd[i]);
+ }
pud_clear(pud);
+
+ /* INVLPG to clear all paging-structure caches */
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (!pmd_none(pmd_sv[i])) {
+ pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
+ free_page((unsigned long)pte);
+ }
+ }
+
+ free_page((unsigned long)pmd_sv);
free_page((unsigned long)pmd);
return 1;
@@ -753,7 +773,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
* @pmd: Pointer to a PMD.
* @addr: Virtual address associated with pmd.
*
- * Context: The pmd range has been unmaped and TLB purged.
+ * Context: The pmd range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
@@ -765,6 +785,10 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
pte = (pte_t *)pmd_page_vaddr(*pmd);
pmd_clear(pmd);
+
+ /* INVLPG to clear all paging-structure caches */
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
free_page((unsigned long)pte);
return 1;
Hi Toshi,
On Wed, Jun 27, 2018 at 08:13:47AM -0600, Toshi Kani wrote:
> From: Chintan Pandya <[email protected]>
>
> The following kernel panic was observed on ARM64 platform due to a stale
> TLB entry.
>
> 1. ioremap with 4K size, a valid pte page table is set.
> 2. iounmap it, its pte entry is set to 0.
> 3. ioremap the same address with 2M size, update its pmd entry with
> a new value.
> 4. CPU may hit an exception because the old pmd entry is still in TLB,
> which leads to a kernel panic.
>
> Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
> table") has addressed this panic by falling to pte mappings in the above
> case on ARM64.
>
> To support pmd mappings in all cases, TLB purge needs to be performed
> in this case on ARM64.
>
> Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
> so that TLB purge can be added later in seprate patches.
So I acked v13 of Chintan's series posted here:
http://lists.infradead.org/pipermail/linux-arm-kernel/2018-June/582953.html
any chance this lot could all be merged together, please?
Will
On Wed, 2018-06-27 at 16:56 +0100, Will Deacon wrote:
> Hi Toshi,
>
> On Wed, Jun 27, 2018 at 08:13:47AM -0600, Toshi Kani wrote:
> > From: Chintan Pandya <[email protected]>
> >
> > The following kernel panic was observed on ARM64 platform due to a stale
> > TLB entry.
> >
> > 1. ioremap with 4K size, a valid pte page table is set.
> > 2. iounmap it, its pte entry is set to 0.
> > 3. ioremap the same address with 2M size, update its pmd entry with
> > a new value.
> > 4. CPU may hit an exception because the old pmd entry is still in TLB,
> > which leads to a kernel panic.
> >
> > Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
> > table") has addressed this panic by falling to pte mappings in the above
> > case on ARM64.
> >
> > To support pmd mappings in all cases, TLB purge needs to be performed
> > in this case on ARM64.
> >
> > Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
> > so that TLB purge can be added later in seprate patches.
>
> So I acked v13 of Chintan's series posted here:
>
> http://lists.infradead.org/pipermail/linux-arm-kernel/2018-June/582953.html
>
> any chance this lot could all be merged together, please?
Hi Will,
Chintan's patch 2/3 and 3/3 apply cleanly on top of my series. Can you
please coordinate with Thomas on the logistics?
Thanks,
-Toshi
Hi Toshi, Thomas,
On Wed, Jun 27, 2018 at 04:13:22PM +0000, Kani, Toshi wrote:
> On Wed, 2018-06-27 at 16:56 +0100, Will Deacon wrote:
> > On Wed, Jun 27, 2018 at 08:13:47AM -0600, Toshi Kani wrote:
> > > From: Chintan Pandya <[email protected]>
> > >
> > > The following kernel panic was observed on ARM64 platform due to a stale
> > > TLB entry.
> > >
> > > 1. ioremap with 4K size, a valid pte page table is set.
> > > 2. iounmap it, its pte entry is set to 0.
> > > 3. ioremap the same address with 2M size, update its pmd entry with
> > > a new value.
> > > 4. CPU may hit an exception because the old pmd entry is still in TLB,
> > > which leads to a kernel panic.
> > >
> > > Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
> > > table") has addressed this panic by falling to pte mappings in the above
> > > case on ARM64.
> > >
> > > To support pmd mappings in all cases, TLB purge needs to be performed
> > > in this case on ARM64.
> > >
> > > Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
> > > so that TLB purge can be added later in seprate patches.
> >
> > So I acked v13 of Chintan's series posted here:
> >
> > http://lists.infradead.org/pipermail/linux-arm-kernel/2018-June/582953.html
> >
> > any chance this lot could all be merged together, please?
>
> Chintan's patch 2/3 and 3/3 apply cleanly on top of my series. Can you
> please coordinate with Thomas on the logistics?
Sure. I guess having this series on a common branch that I can pull into
arm64 and apply Chintan's other patches on top would work.
How does that sound?
Will
On Fri, 2018-06-29 at 13:23 +0100, Will Deacon wrote:
> Hi Toshi, Thomas,
>
> On Wed, Jun 27, 2018 at 04:13:22PM +0000, Kani, Toshi wrote:
> > On Wed, 2018-06-27 at 16:56 +0100, Will Deacon wrote:
> > > On Wed, Jun 27, 2018 at 08:13:47AM -0600, Toshi Kani wrote:
> > > > From: Chintan Pandya <[email protected]>
> > > >
> > > > The following kernel panic was observed on ARM64 platform due to a stale
> > > > TLB entry.
> > > >
> > > > 1. ioremap with 4K size, a valid pte page table is set.
> > > > 2. iounmap it, its pte entry is set to 0.
> > > > 3. ioremap the same address with 2M size, update its pmd entry with
> > > > a new value.
> > > > 4. CPU may hit an exception because the old pmd entry is still in TLB,
> > > > which leads to a kernel panic.
> > > >
> > > > Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
> > > > table") has addressed this panic by falling to pte mappings in the above
> > > > case on ARM64.
> > > >
> > > > To support pmd mappings in all cases, TLB purge needs to be performed
> > > > in this case on ARM64.
> > > >
> > > > Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
> > > > so that TLB purge can be added later in seprate patches.
> > >
> > > So I acked v13 of Chintan's series posted here:
> > >
> > > http://lists.infradead.org/pipermail/linux-arm-kernel/2018-June/582953.html
> > >
> > > any chance this lot could all be merged together, please?
> >
> > Chintan's patch 2/3 and 3/3 apply cleanly on top of my series. Can you
> > please coordinate with Thomas on the logistics?
>
> Sure. I guess having this series on a common branch that I can pull into
> arm64 and apply Chintan's other patches on top would work.
>
> How does that sound?
Should this go thru -mm tree then?
Andrew, Thomas, what do you think?
Thanks,
-Toshi
On Fri, 29 Jun 2018, Kani, Toshi wrote:
> On Fri, 2018-06-29 at 13:23 +0100, Will Deacon wrote:
> > Hi Toshi, Thomas,
> >
> > On Wed, Jun 27, 2018 at 04:13:22PM +0000, Kani, Toshi wrote:
> > > On Wed, 2018-06-27 at 16:56 +0100, Will Deacon wrote:
> > > > On Wed, Jun 27, 2018 at 08:13:47AM -0600, Toshi Kani wrote:
> > > > > From: Chintan Pandya <[email protected]>
> > > > >
> > > > > The following kernel panic was observed on ARM64 platform due to a stale
> > > > > TLB entry.
> > > > >
> > > > > 1. ioremap with 4K size, a valid pte page table is set.
> > > > > 2. iounmap it, its pte entry is set to 0.
> > > > > 3. ioremap the same address with 2M size, update its pmd entry with
> > > > > a new value.
> > > > > 4. CPU may hit an exception because the old pmd entry is still in TLB,
> > > > > which leads to a kernel panic.
> > > > >
> > > > > Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
> > > > > table") has addressed this panic by falling to pte mappings in the above
> > > > > case on ARM64.
> > > > >
> > > > > To support pmd mappings in all cases, TLB purge needs to be performed
> > > > > in this case on ARM64.
> > > > >
> > > > > Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
> > > > > so that TLB purge can be added later in seprate patches.
> > > >
> > > > So I acked v13 of Chintan's series posted here:
> > > >
> > > > http://lists.infradead.org/pipermail/linux-arm-kernel/2018-June/582953.html
> > > >
> > > > any chance this lot could all be merged together, please?
> > >
> > > Chintan's patch 2/3 and 3/3 apply cleanly on top of my series. Can you
> > > please coordinate with Thomas on the logistics?
> >
> > Sure. I guess having this series on a common branch that I can pull into
> > arm64 and apply Chintan's other patches on top would work.
> >
> > How does that sound?
>
> Should this go thru -mm tree then?
>
> Andrew, Thomas, what do you think?
I just pick it up and provide Will a branch to pull that lot from.
Thanks,
tglx
On Tue, Jul 03, 2018 at 11:02:15PM +0200, Thomas Gleixner wrote:
> On Fri, 29 Jun 2018, Kani, Toshi wrote:
> > On Fri, 2018-06-29 at 13:23 +0100, Will Deacon wrote:
> > > On Wed, Jun 27, 2018 at 04:13:22PM +0000, Kani, Toshi wrote:
> > > > On Wed, 2018-06-27 at 16:56 +0100, Will Deacon wrote:
> > > > > On Wed, Jun 27, 2018 at 08:13:47AM -0600, Toshi Kani wrote:
> > > > > > From: Chintan Pandya <[email protected]>
> > > > > >
> > > > > > The following kernel panic was observed on ARM64 platform due to a stale
> > > > > > TLB entry.
> > > > > >
> > > > > > 1. ioremap with 4K size, a valid pte page table is set.
> > > > > > 2. iounmap it, its pte entry is set to 0.
> > > > > > 3. ioremap the same address with 2M size, update its pmd entry with
> > > > > > a new value.
> > > > > > 4. CPU may hit an exception because the old pmd entry is still in TLB,
> > > > > > which leads to a kernel panic.
> > > > > >
> > > > > > Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
> > > > > > table") has addressed this panic by falling to pte mappings in the above
> > > > > > case on ARM64.
> > > > > >
> > > > > > To support pmd mappings in all cases, TLB purge needs to be performed
> > > > > > in this case on ARM64.
> > > > > >
> > > > > > Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
> > > > > > so that TLB purge can be added later in seprate patches.
> > > > >
> > > > > So I acked v13 of Chintan's series posted here:
> > > > >
> > > > > http://lists.infradead.org/pipermail/linux-arm-kernel/2018-June/582953.html
> > > > >
> > > > > any chance this lot could all be merged together, please?
> > > >
> > > > Chintan's patch 2/3 and 3/3 apply cleanly on top of my series. Can you
> > > > please coordinate with Thomas on the logistics?
> > >
> > > Sure. I guess having this series on a common branch that I can pull into
> > > arm64 and apply Chintan's other patches on top would work.
> > >
> > > How does that sound?
> >
> > Should this go thru -mm tree then?
> >
> > Andrew, Thomas, what do you think?
>
> I just pick it up and provide Will a branch to pull that lot from.
Thanks, Thomas. Please let me know once you've pushed something out.
Will
Commit-ID: f967db0b9ed44ec3057a28f3b28efc51df51b835
Gitweb: https://git.kernel.org/tip/f967db0b9ed44ec3057a28f3b28efc51df51b835
Author: Toshi Kani <[email protected]>
AuthorDate: Wed, 27 Jun 2018 08:13:46 -0600
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 4 Jul 2018 21:37:08 +0200
x86/mm: Disable ioremap free page handling on x86-PAE
ioremap() supports pmd mappings on x86-PAE. However, kernel's pmd
tables are not shared among processes on x86-PAE. Therefore, any
update to sync'd pmd entries need re-syncing. Freeing a pte page
also leads to a vmalloc fault and hits the BUG_ON in vmalloc_sync_one().
Disable free page handling on x86-PAE. pud_free_pmd_page() and
pmd_free_pte_page() simply return 0 if a given pud/pmd entry is present.
This assures that ioremap() does not update sync'd pmd entries at the
cost of falling back to pte mappings.
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Reported-by: Joerg Roedel <[email protected]>
Signed-off-by: Toshi Kani <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Andrew Morton <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/mm/pgtable.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 47b5951e592b..1aeb7a5dbce5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -719,6 +719,7 @@ int pmd_clear_huge(pmd_t *pmd)
return 0;
}
+#ifdef CONFIG_X86_64
/**
* pud_free_pmd_page - Clear pud entry and free pmd page.
* @pud: Pointer to a PUD.
@@ -766,4 +767,22 @@ int pmd_free_pte_page(pmd_t *pmd)
return 1;
}
+
+#else /* !CONFIG_X86_64 */
+
+int pud_free_pmd_page(pud_t *pud)
+{
+ return pud_none(*pud);
+}
+
+/*
+ * Disable free page handling on x86-PAE. This assures that ioremap()
+ * does not update sync'd pmd entries. See vmalloc_sync_one().
+ */
+int pmd_free_pte_page(pmd_t *pmd)
+{
+ return pmd_none(*pmd);
+}
+
+#endif /* CONFIG_X86_64 */
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
On Wed, 4 Jul 2018, Will Deacon wrote:
> On Tue, Jul 03, 2018 at 11:02:15PM +0200, Thomas Gleixner wrote:
>
> > I just pick it up and provide Will a branch to pull that lot from.
>
> Thanks, Thomas. Please let me know once you've pushed something out.
Just pushed it out into tip x86/mm branch. It's based on -rc3 and you can
consume it up to
5e0fb5df2ee8 ("x86/mm: Add TLB purge to free pmd/pte page interfaces")
Please wait until tomorrow morning so the 0day robot can chew on it. If
nothing breaks, then it should be good to pull from.
Thanks,
tglx
Commit-ID: 785a19f9d1dd8a4ab2d0633be4656653bd3de1fc
Gitweb: https://git.kernel.org/tip/785a19f9d1dd8a4ab2d0633be4656653bd3de1fc
Author: Chintan Pandya <[email protected]>
AuthorDate: Wed, 27 Jun 2018 08:13:47 -0600
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 4 Jul 2018 21:37:08 +0200
ioremap: Update pgtable free interfaces with addr
The following kernel panic was observed on ARM64 platform due to a stale
TLB entry.
1. ioremap with 4K size, a valid pte page table is set.
2. iounmap it, its pte entry is set to 0.
3. ioremap the same address with 2M size, update its pmd entry with
a new value.
4. CPU may hit an exception because the old pmd entry is still in TLB,
which leads to a kernel panic.
Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
table") has addressed this panic by falling to pte mappings in the above
case on ARM64.
To support pmd mappings in all cases, TLB purge needs to be performed
in this case on ARM64.
Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
so that TLB purge can be added later in seprate patches.
[[email protected]: merge changes, rewrite patch description]
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Chintan Pandya <[email protected]>
Signed-off-by: Toshi Kani <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Will Deacon <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: [email protected]
Cc: Andrew Morton <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/arm64/mm/mmu.c | 4 ++--
arch/x86/mm/pgtable.c | 12 +++++++-----
include/asm-generic/pgtable.h | 8 ++++----
lib/ioremap.c | 4 ++--
4 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 493ff75670ff..8ae5d7ae4af3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -977,12 +977,12 @@ int pmd_clear_huge(pmd_t *pmdp)
return 1;
}
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
return pud_none(*pud);
}
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
return pmd_none(*pmd);
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1aeb7a5dbce5..fbd14e506758 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -723,11 +723,12 @@ int pmd_clear_huge(pmd_t *pmd)
/**
* pud_free_pmd_page - Clear pud entry and free pmd page.
* @pud: Pointer to a PUD.
+ * @addr: Virtual address associated with pud.
*
* Context: The pud range has been unmaped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
pmd_t *pmd;
int i;
@@ -738,7 +739,7 @@ int pud_free_pmd_page(pud_t *pud)
pmd = (pmd_t *)pud_page_vaddr(*pud);
for (i = 0; i < PTRS_PER_PMD; i++)
- if (!pmd_free_pte_page(&pmd[i]))
+ if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
return 0;
pud_clear(pud);
@@ -750,11 +751,12 @@ int pud_free_pmd_page(pud_t *pud)
/**
* pmd_free_pte_page - Clear pmd entry and free pte page.
* @pmd: Pointer to a PMD.
+ * @addr: Virtual address associated with pmd.
*
* Context: The pmd range has been unmaped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
pte_t *pte;
@@ -770,7 +772,7 @@ int pmd_free_pte_page(pmd_t *pmd)
#else /* !CONFIG_X86_64 */
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
return pud_none(*pud);
}
@@ -779,7 +781,7 @@ int pud_free_pmd_page(pud_t *pud)
* Disable free page handling on x86-PAE. This assures that ioremap()
* does not update sync'd pmd entries. See vmalloc_sync_one().
*/
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
return pmd_none(*pmd);
}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f59639afaa39..b081794ba135 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1019,8 +1019,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pud_clear_huge(pud_t *pud);
int pmd_clear_huge(pmd_t *pmd);
-int pud_free_pmd_page(pud_t *pud);
-int pmd_free_pte_page(pmd_t *pmd);
+int pud_free_pmd_page(pud_t *pud, unsigned long addr);
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
@@ -1046,11 +1046,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
{
return 0;
}
-static inline int pud_free_pmd_page(pud_t *pud)
+static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
return 0;
}
-static inline int pmd_free_pte_page(pmd_t *pmd)
+static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
return 0;
}
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 54e5bbaa3200..517f5853ffed 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
if (ioremap_pmd_enabled() &&
((next - addr) == PMD_SIZE) &&
IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
- pmd_free_pte_page(pmd)) {
+ pmd_free_pte_page(pmd, addr)) {
if (pmd_set_huge(pmd, phys_addr + addr, prot))
continue;
}
@@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
if (ioremap_pud_enabled() &&
((next - addr) == PUD_SIZE) &&
IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
- pud_free_pmd_page(pud)) {
+ pud_free_pmd_page(pud, addr)) {
if (pud_set_huge(pud, phys_addr + addr, prot))
continue;
}
Commit-ID: 5e0fb5df2ee871b841f96f9cb6a7f2784e96aa4e
Gitweb: https://git.kernel.org/tip/5e0fb5df2ee871b841f96f9cb6a7f2784e96aa4e
Author: Toshi Kani <[email protected]>
AuthorDate: Wed, 27 Jun 2018 08:13:48 -0600
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 4 Jul 2018 21:37:09 +0200
x86/mm: Add TLB purge to free pmd/pte page interfaces
ioremap() calls pud_free_pmd_page() / pmd_free_pte_page() when it creates
a pud / pmd map. The following preconditions are met at their entry.
- All pte entries for a target pud/pmd address range have been cleared.
- System-wide TLB purges have been peformed for a target pud/pmd address
range.
The preconditions assure that there is no stale TLB entry for the range.
Speculation may not cache TLB entries since it requires all levels of page
entries, including ptes, to have P & A-bits set for an associated address.
However, speculation may cache pud/pmd entries (paging-structure caches)
when they have P-bit set.
Add a system-wide TLB purge (INVLPG) to a single page after clearing
pud/pmd entry's P-bit.
SDM 4.10.4.1, Operation that Invalidate TLBs and Paging-Structure Caches,
states that:
INVLPG invalidates all paging-structure caches associated with the
current PCID regardless of the liner addresses to which they correspond.
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Toshi Kani <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Joerg Roedel <[email protected]>
Cc: [email protected]
Cc: Andrew Morton <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/mm/pgtable.c | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index fbd14e506758..e3deefb891da 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -725,24 +725,44 @@ int pmd_clear_huge(pmd_t *pmd)
* @pud: Pointer to a PUD.
* @addr: Virtual address associated with pud.
*
- * Context: The pud range has been unmaped and TLB purged.
+ * Context: The pud range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
+ *
+ * NOTE: Callers must allow a single page allocation.
*/
int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
- pmd_t *pmd;
+ pmd_t *pmd, *pmd_sv;
+ pte_t *pte;
int i;
if (pud_none(*pud))
return 1;
pmd = (pmd_t *)pud_page_vaddr(*pud);
+ pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
+ if (!pmd_sv)
+ return 0;
- for (i = 0; i < PTRS_PER_PMD; i++)
- if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
- return 0;
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd_sv[i] = pmd[i];
+ if (!pmd_none(pmd[i]))
+ pmd_clear(&pmd[i]);
+ }
pud_clear(pud);
+
+ /* INVLPG to clear all paging-structure caches */
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (!pmd_none(pmd_sv[i])) {
+ pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
+ free_page((unsigned long)pte);
+ }
+ }
+
+ free_page((unsigned long)pmd_sv);
free_page((unsigned long)pmd);
return 1;
@@ -753,7 +773,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
* @pmd: Pointer to a PMD.
* @addr: Virtual address associated with pmd.
*
- * Context: The pmd range has been unmaped and TLB purged.
+ * Context: The pmd range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
@@ -765,6 +785,10 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
pte = (pte_t *)pmd_page_vaddr(*pmd);
pmd_clear(pmd);
+
+ /* INVLPG to clear all paging-structure caches */
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
free_page((unsigned long)pte);
return 1;
On Wed, Jul 04, 2018 at 09:39:50PM +0200, Thomas Gleixner wrote:
> On Wed, 4 Jul 2018, Will Deacon wrote:
> > On Tue, Jul 03, 2018 at 11:02:15PM +0200, Thomas Gleixner wrote:
> >
> > > I just pick it up and provide Will a branch to pull that lot from.
> >
> > Thanks, Thomas. Please let me know once you've pushed something out.
>
> Just pushed it out into tip x86/mm branch. It's based on -rc3 and you can
> consume it up to
>
> 5e0fb5df2ee8 ("x86/mm: Add TLB purge to free pmd/pte page interfaces")
>
> Please wait until tomorrow morning so the 0day robot can chew on it. If
> nothing breaks, then it should be good to pull from.
Great, thanks Thomas. It looks like the bot's happy with that, so I've
pulled it locally and I'll push it out as part of the arm64 for-next/core
branch tomorrow, after some basic tests.
Will