2020-10-23 09:16:24

by Miles Chen

[permalink] [raw]
Subject: [PATCH v2 3/4] arm: mm: introduce L_PTE_SPECIAL

From: Minchan Kim <[email protected]>

This patch introduces L_PTE_SPECIAL and pte functions for supporting
get_user_pages_fast.

Cc: Russell King <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Steve Capper <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Suren Baghdasaryan <[email protected]>
Signed-off-by: Minchan Kim <[email protected]>
Signed-off-by: Miles Chen <[email protected]>
---
arch/arm/Kconfig | 4 ++--
arch/arm/include/asm/pgtable-2level.h | 1 +
arch/arm/include/asm/pgtable-3level.h | 6 ------
arch/arm/include/asm/pgtable.h | 13 +++++++++++++
4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c18fa9d382b7..1f75864b7c7a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -13,7 +13,7 @@ config ARM
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
- select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
+ select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SET_MEMORY
@@ -82,7 +82,7 @@ config ARM
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
select HAVE_EXIT_THREAD
- select HAVE_FAST_GUP if ARM_LPAE
+ select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index cdcd55cca37d..385e7a32394e 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -117,6 +117,7 @@
#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
#define L_PTE_PRESENT (_AT(pteval_t, 1) << 0)
#define L_PTE_YOUNG (_AT(pteval_t, 1) << 1)
+#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5)
#define L_PTE_DIRTY (_AT(pteval_t, 1) << 6)
#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7)
#define L_PTE_USER (_AT(pteval_t, 1) << 8)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index fbb6693c3352..46fcc6725d3e 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -175,12 +175,6 @@ static inline pmd_t *pud_page_vaddr(pud_t pud)

#define pmd_present(pmd) (pmd_isset((pmd), L_PMD_SECT_VALID))
#define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF))
-#define pte_special(pte) (pte_isset((pte), L_PTE_SPECIAL))
-static inline pte_t pte_mkspecial(pte_t pte)
-{
- pte_val(pte) |= L_PTE_SPECIAL;
- return pte;
-}

#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index c02f24400369..4092154ca779 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -195,6 +195,11 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
#define pte_dirty(pte) (pte_isset((pte), L_PTE_DIRTY))
#define pte_young(pte) (pte_isset((pte), L_PTE_YOUNG))
#define pte_exec(pte) (pte_isclear((pte), L_PTE_XN))
+#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
+#define pte_special(pte) (pte_isset((pte), L_PTE_SPECIAL))
+#else
+#define pte_special(pte) (0)
+#endif

#define pte_valid_user(pte) \
(pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte))
@@ -274,6 +279,14 @@ static inline pte_t pte_mknexec(pte_t pte)
return set_pte_bit(pte, __pgprot(L_PTE_XN));
}

+#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(L_PTE_SPECIAL));
+}
+#else
+static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
+#endif
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER |
--
2.18.0


2020-10-23 16:52:49

by Russell King (Oracle)

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] arm: mm: introduce L_PTE_SPECIAL

On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote:
> From: Minchan Kim <[email protected]>
>
> This patch introduces L_PTE_SPECIAL and pte functions for supporting
> get_user_pages_fast.
>
> Cc: Russell King <[email protected]>
> Cc: Catalin Marinas <[email protected]>
> Cc: Will Deacon <[email protected]>
> Cc: Steve Capper <[email protected]>
> Cc: Minchan Kim <[email protected]>
> Cc: Suren Baghdasaryan <[email protected]>
> Signed-off-by: Minchan Kim <[email protected]>
> Signed-off-by: Miles Chen <[email protected]>
> ---
> arch/arm/Kconfig | 4 ++--
> arch/arm/include/asm/pgtable-2level.h | 1 +
> arch/arm/include/asm/pgtable-3level.h | 6 ------
> arch/arm/include/asm/pgtable.h | 13 +++++++++++++
> 4 files changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index c18fa9d382b7..1f75864b7c7a 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -13,7 +13,7 @@ config ARM
> select ARCH_HAS_KCOV
> select ARCH_HAS_MEMBARRIER_SYNC_CORE
> select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> - select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
> + select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> select ARCH_HAS_PHYS_TO_DMA
> select ARCH_HAS_SETUP_DMA_OPS
> select ARCH_HAS_SET_MEMORY
> @@ -82,7 +82,7 @@ config ARM
> select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
> select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
> select HAVE_EXIT_THREAD
> - select HAVE_FAST_GUP if ARM_LPAE
> + select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
> select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
> select HAVE_FUNCTION_TRACER if !XIP_KERNEL
> diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
> index cdcd55cca37d..385e7a32394e 100644
> --- a/arch/arm/include/asm/pgtable-2level.h
> +++ b/arch/arm/include/asm/pgtable-2level.h
> @@ -117,6 +117,7 @@
> #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
> #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0)
> #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1)
> +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5)

How does this work? Bits 2 through 5 are already in use for the memory
type.

Repurposing this bit means that L_PTE_MT_DEV_NONSHARED,
L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with
it.

--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

2020-10-28 01:41:26

by Miles Chen

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] arm: mm: introduce L_PTE_SPECIAL

On Fri, 2020-10-23 at 11:08 +0100, Russell King - ARM Linux admin wrote:
> On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote:
> > From: Minchan Kim <[email protected]>
> >
> > This patch introduces L_PTE_SPECIAL and pte functions for supporting
> > get_user_pages_fast.
> >
> > Cc: Russell King <[email protected]>
> > Cc: Catalin Marinas <[email protected]>
> > Cc: Will Deacon <[email protected]>
> > Cc: Steve Capper <[email protected]>
> > Cc: Minchan Kim <[email protected]>
> > Cc: Suren Baghdasaryan <[email protected]>
> > Signed-off-by: Minchan Kim <[email protected]>
> > Signed-off-by: Miles Chen <[email protected]>
> > ---
> > arch/arm/Kconfig | 4 ++--
> > arch/arm/include/asm/pgtable-2level.h | 1 +
> > arch/arm/include/asm/pgtable-3level.h | 6 ------
> > arch/arm/include/asm/pgtable.h | 13 +++++++++++++
> > 4 files changed, 16 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> > index c18fa9d382b7..1f75864b7c7a 100644
> > --- a/arch/arm/Kconfig
> > +++ b/arch/arm/Kconfig
> > @@ -13,7 +13,7 @@ config ARM
> > select ARCH_HAS_KCOV
> > select ARCH_HAS_MEMBARRIER_SYNC_CORE
> > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > - select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
> > + select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > select ARCH_HAS_PHYS_TO_DMA
> > select ARCH_HAS_SETUP_DMA_OPS
> > select ARCH_HAS_SET_MEMORY
> > @@ -82,7 +82,7 @@ config ARM
> > select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
> > select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
> > select HAVE_EXIT_THREAD
> > - select HAVE_FAST_GUP if ARM_LPAE
> > + select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
> > select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
> > select HAVE_FUNCTION_TRACER if !XIP_KERNEL
> > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
> > index cdcd55cca37d..385e7a32394e 100644
> > --- a/arch/arm/include/asm/pgtable-2level.h
> > +++ b/arch/arm/include/asm/pgtable-2level.h
> > @@ -117,6 +117,7 @@
> > #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
> > #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0)
> > #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1)
> > +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5)
>
> How does this work? Bits 2 through 5 are already in use for the memory
> type.
>
> Repurposing this bit means that L_PTE_MT_DEV_NONSHARED,
> L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with
> it.

Thanks for the comment.
The idea is to re-order the memory type table in [1] (patch v2/4) and
use bit 5 for L_PTE_SPECIAL.

[1] https://lore.kernel.org/patchwork/patch/1323893/


Miles

2020-10-28 06:08:38

by Russell King (Oracle)

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] arm: mm: introduce L_PTE_SPECIAL

On Tue, Oct 27, 2020 at 03:45:12PM +0800, Miles Chen wrote:
> On Fri, 2020-10-23 at 11:08 +0100, Russell King - ARM Linux admin wrote:
> > On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote:
> > > From: Minchan Kim <[email protected]>
> > >
> > > This patch introduces L_PTE_SPECIAL and pte functions for supporting
> > > get_user_pages_fast.
> > >
> > > Cc: Russell King <[email protected]>
> > > Cc: Catalin Marinas <[email protected]>
> > > Cc: Will Deacon <[email protected]>
> > > Cc: Steve Capper <[email protected]>
> > > Cc: Minchan Kim <[email protected]>
> > > Cc: Suren Baghdasaryan <[email protected]>
> > > Signed-off-by: Minchan Kim <[email protected]>
> > > Signed-off-by: Miles Chen <[email protected]>
> > > ---
> > > arch/arm/Kconfig | 4 ++--
> > > arch/arm/include/asm/pgtable-2level.h | 1 +
> > > arch/arm/include/asm/pgtable-3level.h | 6 ------
> > > arch/arm/include/asm/pgtable.h | 13 +++++++++++++
> > > 4 files changed, 16 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> > > index c18fa9d382b7..1f75864b7c7a 100644
> > > --- a/arch/arm/Kconfig
> > > +++ b/arch/arm/Kconfig
> > > @@ -13,7 +13,7 @@ config ARM
> > > select ARCH_HAS_KCOV
> > > select ARCH_HAS_MEMBARRIER_SYNC_CORE
> > > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > > - select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
> > > + select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > > select ARCH_HAS_PHYS_TO_DMA
> > > select ARCH_HAS_SETUP_DMA_OPS
> > > select ARCH_HAS_SET_MEMORY
> > > @@ -82,7 +82,7 @@ config ARM
> > > select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
> > > select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
> > > select HAVE_EXIT_THREAD
> > > - select HAVE_FAST_GUP if ARM_LPAE
> > > + select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > > select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
> > > select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
> > > select HAVE_FUNCTION_TRACER if !XIP_KERNEL
> > > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
> > > index cdcd55cca37d..385e7a32394e 100644
> > > --- a/arch/arm/include/asm/pgtable-2level.h
> > > +++ b/arch/arm/include/asm/pgtable-2level.h
> > > @@ -117,6 +117,7 @@
> > > #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
> > > #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0)
> > > #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1)
> > > +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5)
> >
> > How does this work? Bits 2 through 5 are already in use for the memory
> > type.
> >
> > Repurposing this bit means that L_PTE_MT_DEV_NONSHARED,
> > L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with
> > it.
>
> Thanks for the comment.
> The idea is to re-order the memory type table in [1] (patch v2/4) and
> use bit 5 for L_PTE_SPECIAL.

Thanks, I know what you are trying to achieve. I don't think it's
possible without breaking the kernel on some CPUs and configurations.

--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

2020-11-01 12:52:40

by Miles Chen

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] arm: mm: introduce L_PTE_SPECIAL

On Tue, 2020-10-27 at 09:11 +0000, Russell King - ARM Linux admin wrote:
> On Tue, Oct 27, 2020 at 03:45:12PM +0800, Miles Chen wrote:
> > On Fri, 2020-10-23 at 11:08 +0100, Russell King - ARM Linux admin wrote:
> > > On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote:
> > > > From: Minchan Kim <[email protected]>
> > > >
> > > > This patch introduces L_PTE_SPECIAL and pte functions for supporting
> > > > get_user_pages_fast.
> > > >
> > > > Cc: Russell King <[email protected]>
> > > > Cc: Catalin Marinas <[email protected]>
> > > > Cc: Will Deacon <[email protected]>
> > > > Cc: Steve Capper <[email protected]>
> > > > Cc: Minchan Kim <[email protected]>
> > > > Cc: Suren Baghdasaryan <[email protected]>
> > > > Signed-off-by: Minchan Kim <[email protected]>
> > > > Signed-off-by: Miles Chen <[email protected]>
> > > > ---
> > > > arch/arm/Kconfig | 4 ++--
> > > > arch/arm/include/asm/pgtable-2level.h | 1 +
> > > > arch/arm/include/asm/pgtable-3level.h | 6 ------
> > > > arch/arm/include/asm/pgtable.h | 13 +++++++++++++
> > > > 4 files changed, 16 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> > > > index c18fa9d382b7..1f75864b7c7a 100644
> > > > --- a/arch/arm/Kconfig
> > > > +++ b/arch/arm/Kconfig
> > > > @@ -13,7 +13,7 @@ config ARM
> > > > select ARCH_HAS_KCOV
> > > > select ARCH_HAS_MEMBARRIER_SYNC_CORE
> > > > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > > > - select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
> > > > + select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > > > select ARCH_HAS_PHYS_TO_DMA
> > > > select ARCH_HAS_SETUP_DMA_OPS
> > > > select ARCH_HAS_SET_MEMORY
> > > > @@ -82,7 +82,7 @@ config ARM
> > > > select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
> > > > select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
> > > > select HAVE_EXIT_THREAD
> > > > - select HAVE_FAST_GUP if ARM_LPAE
> > > > + select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > > > select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
> > > > select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
> > > > select HAVE_FUNCTION_TRACER if !XIP_KERNEL
> > > > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
> > > > index cdcd55cca37d..385e7a32394e 100644
> > > > --- a/arch/arm/include/asm/pgtable-2level.h
> > > > +++ b/arch/arm/include/asm/pgtable-2level.h
> > > > @@ -117,6 +117,7 @@
> > > > #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
> > > > #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0)
> > > > #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1)
> > > > +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5)
> > >
> > > How does this work? Bits 2 through 5 are already in use for the memory
> > > type.
> > >
> > > Repurposing this bit means that L_PTE_MT_DEV_NONSHARED,
> > > L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with
> > > it.
> >
> > Thanks for the comment.
> > The idea is to re-order the memory type table in [1] (patch v2/4) and
> > use bit 5 for L_PTE_SPECIAL.
>
> Thanks, I know what you are trying to achieve. I don't think it's
> possible without breaking the kernel on some CPUs and configurations.
>
Got it. Thanks for your review.


Miles