Attempt VMA lock-based page fault handling first, and fall back to the
existing mmap_lock-based handling if that fails.
A simple running the ebizzy benchmark on Lichee Pi 4A shows that
PER_VMA_LOCK can improve the ebizzy benchmark by about 32.68%. In
theory, the more CPUs, the bigger improvement, but I don't have any
HW platform which has more than 4 CPUs.
This is the riscv variant of "x86/mm: try VMA lock-based page fault
handling first".
Signed-off-by: Jisheng Zhang <[email protected]>
---
Any performance numbers are welcome! Especially the numbers on HW
platforms with 8 or more CPUs.
arch/riscv/Kconfig | 1 +
arch/riscv/mm/fault.c | 33 +++++++++++++++++++++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 62e84fee2cfd..b958f67f9a12 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -42,6 +42,7 @@ config RISCV
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
+ select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 8685f85a7474..eccdddf26f4b 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -286,6 +286,36 @@ void handle_page_fault(struct pt_regs *regs)
flags |= FAULT_FLAG_WRITE;
else if (cause == EXC_INST_PAGE_FAULT)
flags |= FAULT_FLAG_INSTRUCTION;
+#ifdef CONFIG_PER_VMA_LOCK
+ if (!(flags & FAULT_FLAG_USER))
+ goto lock_mmap;
+
+ vma = lock_vma_under_rcu(mm, addr);
+ if (!vma)
+ goto lock_mmap;
+
+ if (unlikely(access_error(cause, vma))) {
+ vma_end_read(vma);
+ goto lock_mmap;
+ }
+
+ fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
+ vma_end_read(vma);
+
+ if (!(fault & VM_FAULT_RETRY)) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto done;
+ }
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ no_context(regs, addr);
+ return;
+ }
+lock_mmap:
+#endif /* CONFIG_PER_VMA_LOCK */
+
retry:
mmap_read_lock(mm);
vma = find_vma(mm, addr);
@@ -355,6 +385,9 @@ void handle_page_fault(struct pt_regs *regs)
mmap_read_unlock(mm);
+#ifdef CONFIG_PER_VMA_LOCK
+done:
+#endif
if (unlikely(fault & VM_FAULT_ERROR)) {
tsk->thread.bad_cause = cause;
mm_fault_error(regs, addr, fault);
--
2.40.1
> Attempt VMA lock-based page fault handling first, and fall back to the
> existing mmap_lock-based handling if that fails.
>
> A simple running the ebizzy benchmark on Lichee Pi 4A shows that
> PER_VMA_LOCK can improve the ebizzy benchmark by about 32.68%. In
Good improvement, I think VMA lock is worth to support in riscv.
Please give more details about ebizzy, Is it
https://github.com/linux-test-project/ltp/blob/master/utils/benchmark/ebizzy-0.3/ebizzy.c
?
> theory, the more CPUs, the bigger improvement, but I don't have any
> HW platform which has more than 4 CPUs.
>
> This is the riscv variant of "x86/mm: try VMA lock-based page fault
> handling first".
>
How about add Link tag here:
Link: https://lwn.net/Articles/906852/
> Signed-off-by: Jisheng Zhang <[email protected]>
> ---
> Any performance numbers are welcome! Especially the numbers on HW
> platforms with 8 or more CPUs.
>
> arch/riscv/Kconfig | 1 +
> arch/riscv/mm/fault.c | 33 +++++++++++++++++++++++++++++++++
> 2 files changed, 34 insertions(+)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 62e84fee2cfd..b958f67f9a12 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -42,6 +42,7 @@ config RISCV
> select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
> select ARCH_SUPPORTS_HUGETLBFS if MMU
> select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
> + select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
> select ARCH_USE_MEMTEST
> select ARCH_USE_QUEUED_RWLOCKS
> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
> index 8685f85a7474..eccdddf26f4b 100644
> --- a/arch/riscv/mm/fault.c
> +++ b/arch/riscv/mm/fault.c
> @@ -286,6 +286,36 @@ void handle_page_fault(struct pt_regs *regs)
> flags |= FAULT_FLAG_WRITE;
> else if (cause == EXC_INST_PAGE_FAULT)
> flags |= FAULT_FLAG_INSTRUCTION;
> +#ifdef CONFIG_PER_VMA_LOCK
> + if (!(flags & FAULT_FLAG_USER))
> + goto lock_mmap;
> +
> + vma = lock_vma_under_rcu(mm, addr);
> + if (!vma)
> + goto lock_mmap;
> +
> + if (unlikely(access_error(cause, vma))) {
> + vma_end_read(vma);
> + goto lock_mmap;
> + }
> +
> + fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
> + vma_end_read(vma);
> +
> + if (!(fault & VM_FAULT_RETRY)) {
> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> + goto done;
> + }
> + count_vm_vma_lock_event(VMA_LOCK_RETRY);
> +
> + if (fault_signal_pending(fault, regs)) {
> + if (!user_mode(regs))
> + no_context(regs, addr);
> + return;
> + }
> +lock_mmap:
> +#endif /* CONFIG_PER_VMA_LOCK */
> +
> retry:
> mmap_read_lock(mm);
> vma = find_vma(mm, addr);
> @@ -355,6 +385,9 @@ void handle_page_fault(struct pt_regs *regs)
>
> mmap_read_unlock(mm);
>
> +#ifdef CONFIG_PER_VMA_LOCK
> +done:
> +#endif
It's very close to cd7f176aea5f ("arm64/mm: try VMA lock-based page fault
handling first"), and I didn't find any problem. So:
Reviewed-by: Guo Ren <[email protected]>
F.Y.I Huacai Chen, maybe he also would be interesting this new feature.
> if (unlikely(fault & VM_FAULT_ERROR)) {
> tsk->thread.bad_cause = cause;
> mm_fault_error(regs, addr, fault);
> --
> 2.40.1
On Tue, May 23, 2023 at 10:03 PM <[email protected]> wrote:
>
> > Attempt VMA lock-based page fault handling first, and fall back to the
> > existing mmap_lock-based handling if that fails.
> >
> > A simple running the ebizzy benchmark on Lichee Pi 4A shows that
> > PER_VMA_LOCK can improve the ebizzy benchmark by about 32.68%. In
> Good improvement, I think VMA lock is worth to support in riscv.
>
> Please give more details about ebizzy, Is it
> https://github.com/linux-test-project/ltp/blob/master/utils/benchmark/ebizzy-0.3/ebizzy.c
> ?
>
> > theory, the more CPUs, the bigger improvement, but I don't have any
> > HW platform which has more than 4 CPUs.
> >
> > This is the riscv variant of "x86/mm: try VMA lock-based page fault
> > handling first".
> >
>
> How about add Link tag here:
> Link: https://lwn.net/Articles/906852/
>
> > Signed-off-by: Jisheng Zhang <[email protected]>
> > ---
> > Any performance numbers are welcome! Especially the numbers on HW
> > platforms with 8 or more CPUs.
> >
> > arch/riscv/Kconfig | 1 +
> > arch/riscv/mm/fault.c | 33 +++++++++++++++++++++++++++++++++
> > 2 files changed, 34 insertions(+)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 62e84fee2cfd..b958f67f9a12 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -42,6 +42,7 @@ config RISCV
> > select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
> > select ARCH_SUPPORTS_HUGETLBFS if MMU
> > select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
> > + select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
> > select ARCH_USE_MEMTEST
> > select ARCH_USE_QUEUED_RWLOCKS
> > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
> > index 8685f85a7474..eccdddf26f4b 100644
> > --- a/arch/riscv/mm/fault.c
> > +++ b/arch/riscv/mm/fault.c
> > @@ -286,6 +286,36 @@ void handle_page_fault(struct pt_regs *regs)
> > flags |= FAULT_FLAG_WRITE;
> > else if (cause == EXC_INST_PAGE_FAULT)
> > flags |= FAULT_FLAG_INSTRUCTION;
> > +#ifdef CONFIG_PER_VMA_LOCK
> > + if (!(flags & FAULT_FLAG_USER))
> > + goto lock_mmap;
> > +
> > + vma = lock_vma_under_rcu(mm, addr);
> > + if (!vma)
> > + goto lock_mmap;
> > +
> > + if (unlikely(access_error(cause, vma))) {
> > + vma_end_read(vma);
> > + goto lock_mmap;
> > + }
> > +
> > + fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
> > + vma_end_read(vma);
> > +
> > + if (!(fault & VM_FAULT_RETRY)) {
> > + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> > + goto done;
> > + }
> > + count_vm_vma_lock_event(VMA_LOCK_RETRY);
> > +
> > + if (fault_signal_pending(fault, regs)) {
> > + if (!user_mode(regs))
> > + no_context(regs, addr);
> > + return;
> > + }
> > +lock_mmap:
> > +#endif /* CONFIG_PER_VMA_LOCK */
> > +
> > retry:
> > mmap_read_lock(mm);
> > vma = find_vma(mm, addr);
> > @@ -355,6 +385,9 @@ void handle_page_fault(struct pt_regs *regs)
> >
> > mmap_read_unlock(mm);
> >
> > +#ifdef CONFIG_PER_VMA_LOCK
> > +done:
> > +#endif
> It's very close to cd7f176aea5f ("arm64/mm: try VMA lock-based page fault
> handling first"), and I didn't find any problem. So:
>
> Reviewed-by: Guo Ren <[email protected]>
Looks correct to me.
Reviewed-by: Suren Baghdasaryan <[email protected]>
>
> F.Y.I Huacai Chen, maybe he also would be interesting this new feature.
>
>
> > if (unlikely(fault & VM_FAULT_ERROR)) {
> > tsk->thread.bad_cause = cause;
> > mm_fault_error(regs, addr, fault);
> > --
> > 2.40.1
On Wed, May 24, 2023 at 01:02:59AM -0400, [email protected] wrote:
> > Attempt VMA lock-based page fault handling first, and fall back to the
> > existing mmap_lock-based handling if that fails.
> >
> > A simple running the ebizzy benchmark on Lichee Pi 4A shows that
> > PER_VMA_LOCK can improve the ebizzy benchmark by about 32.68%. In
> Good improvement, I think VMA lock is worth to support in riscv.
>
> Please give more details about ebizzy, Is it
> https://github.com/linux-test-project/ltp/blob/master/utils/benchmark/ebizzy-0.3/ebizzy.c
> ?
yeah it's one of ltp benchmark utils.
>
> > theory, the more CPUs, the bigger improvement, but I don't have any
> > HW platform which has more than 4 CPUs.
> >
> > This is the riscv variant of "x86/mm: try VMA lock-based page fault
> > handling first".
> >
>
> How about add Link tag here:
> Link: https://lwn.net/Articles/906852/
>
> > Signed-off-by: Jisheng Zhang <[email protected]>
> > ---
> > Any performance numbers are welcome! Especially the numbers on HW
> > platforms with 8 or more CPUs.
> >
> > arch/riscv/Kconfig | 1 +
> > arch/riscv/mm/fault.c | 33 +++++++++++++++++++++++++++++++++
> > 2 files changed, 34 insertions(+)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 62e84fee2cfd..b958f67f9a12 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -42,6 +42,7 @@ config RISCV
> > select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
> > select ARCH_SUPPORTS_HUGETLBFS if MMU
> > select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
> > + select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
> > select ARCH_USE_MEMTEST
> > select ARCH_USE_QUEUED_RWLOCKS
> > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
> > index 8685f85a7474..eccdddf26f4b 100644
> > --- a/arch/riscv/mm/fault.c
> > +++ b/arch/riscv/mm/fault.c
> > @@ -286,6 +286,36 @@ void handle_page_fault(struct pt_regs *regs)
> > flags |= FAULT_FLAG_WRITE;
> > else if (cause == EXC_INST_PAGE_FAULT)
> > flags |= FAULT_FLAG_INSTRUCTION;
> > +#ifdef CONFIG_PER_VMA_LOCK
> > + if (!(flags & FAULT_FLAG_USER))
> > + goto lock_mmap;
> > +
> > + vma = lock_vma_under_rcu(mm, addr);
> > + if (!vma)
> > + goto lock_mmap;
> > +
> > + if (unlikely(access_error(cause, vma))) {
> > + vma_end_read(vma);
> > + goto lock_mmap;
> > + }
> > +
> > + fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
> > + vma_end_read(vma);
> > +
> > + if (!(fault & VM_FAULT_RETRY)) {
> > + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> > + goto done;
> > + }
> > + count_vm_vma_lock_event(VMA_LOCK_RETRY);
> > +
> > + if (fault_signal_pending(fault, regs)) {
> > + if (!user_mode(regs))
> > + no_context(regs, addr);
> > + return;
> > + }
> > +lock_mmap:
> > +#endif /* CONFIG_PER_VMA_LOCK */
> > +
> > retry:
> > mmap_read_lock(mm);
> > vma = find_vma(mm, addr);
> > @@ -355,6 +385,9 @@ void handle_page_fault(struct pt_regs *regs)
> >
> > mmap_read_unlock(mm);
> >
> > +#ifdef CONFIG_PER_VMA_LOCK
> > +done:
> > +#endif
> It's very close to cd7f176aea5f ("arm64/mm: try VMA lock-based page fault
> handling first"), and I didn't find any problem. So:
>
> Reviewed-by: Guo Ren <[email protected]>
>
> F.Y.I Huacai Chen, maybe he also would be interesting this new feature.
>
>
> > if (unlikely(fault & VM_FAULT_ERROR)) {
> > tsk->thread.bad_cause = cause;
> > mm_fault_error(regs, addr, fault);
> > --
> > 2.40.1
On Wed, 24 May 2023 00:59:42 +0800, Jisheng Zhang wrote:
> Attempt VMA lock-based page fault handling first, and fall back to the
> existing mmap_lock-based handling if that fails.
>
> A simple running the ebizzy benchmark on Lichee Pi 4A shows that
> PER_VMA_LOCK can improve the ebizzy benchmark by about 32.68%. In
> theory, the more CPUs, the bigger improvement, but I don't have any
> HW platform which has more than 4 CPUs.
>
> [...]
Applied, thanks!
[1/1] riscv: mm: try VMA lock-based page fault handling first
https://git.kernel.org/palmer/c/648321fa0d97
Best regards,
--
Palmer Dabbelt <[email protected]>
Hello:
This patch was applied to riscv/linux.git (for-next)
by Palmer Dabbelt <[email protected]>:
On Wed, 24 May 2023 00:59:42 +0800 you wrote:
> Attempt VMA lock-based page fault handling first, and fall back to the
> existing mmap_lock-based handling if that fails.
>
> A simple running the ebizzy benchmark on Lichee Pi 4A shows that
> PER_VMA_LOCK can improve the ebizzy benchmark by about 32.68%. In
> theory, the more CPUs, the bigger improvement, but I don't have any
> HW platform which has more than 4 CPUs.
>
> [...]
Here is the summary with links:
- riscv: mm: try VMA lock-based page fault handling first
https://git.kernel.org/riscv/c/648321fa0d97
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
On 2023/5/24 0:59, Jisheng Zhang wrote:
> Attempt VMA lock-based page fault handling first, and fall back to the
> existing mmap_lock-based handling if that fails.
>
> A simple running the ebizzy benchmark on Lichee Pi 4A shows that
> PER_VMA_LOCK can improve the ebizzy benchmark by about 32.68%. In
> theory, the more CPUs, the bigger improvement, but I don't have any
> HW platform which has more than 4 CPUs.
>
> This is the riscv variant of "x86/mm: try VMA lock-based page fault
> handling first".
>
> Signed-off-by: Jisheng Zhang <[email protected]>
> ---
> Any performance numbers are welcome! Especially the numbers on HW
> platforms with 8 or more CPUs.
>
> arch/riscv/Kconfig | 1 +
> arch/riscv/mm/fault.c | 33 +++++++++++++++++++++++++++++++++
> 2 files changed, 34 insertions(+)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 62e84fee2cfd..b958f67f9a12 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -42,6 +42,7 @@ config RISCV
> select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
> select ARCH_SUPPORTS_HUGETLBFS if MMU
> select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
> + select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
no need if mmu, see PER_VMA_LOCK
config PER_VMA_LOCK
bool "allow VMA lock-based page fault"
def_bool y
depends on ARCH_SUPPORTS_PER_VMA_LOCK && MMU && SMP
Reviewed-by: Kefeng Wang <[email protected]>
> select ARCH_USE_MEMTEST
> select ARCH_USE_QUEUED_RWLOCKS
> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
> index 8685f85a7474..eccdddf26f4b 100644
> --- a/arch/riscv/mm/fault.c
> +++ b/arch/riscv/mm/fault.c
> @@ -286,6 +286,36 @@ void handle_page_fault(struct pt_regs *regs)
> flags |= FAULT_FLAG_WRITE;
> else if (cause == EXC_INST_PAGE_FAULT)
> flags |= FAULT_FLAG_INSTRUCTION;
> +#ifdef CONFIG_PER_VMA_LOCK
> + if (!(flags & FAULT_FLAG_USER))
> + goto lock_mmap;
> +
> + vma = lock_vma_under_rcu(mm, addr);
> + if (!vma)
> + goto lock_mmap;
> +
> + if (unlikely(access_error(cause, vma))) {
> + vma_end_read(vma);
> + goto lock_mmap;
> + }
> +
> + fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
> + vma_end_read(vma);
> +
> + if (!(fault & VM_FAULT_RETRY)) {
> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> + goto done;
> + }
> + count_vm_vma_lock_event(VMA_LOCK_RETRY);
> +
> + if (fault_signal_pending(fault, regs)) {
> + if (!user_mode(regs))
> + no_context(regs, addr);
> + return;
> + }
> +lock_mmap:
> +#endif /* CONFIG_PER_VMA_LOCK */
> +
> retry:
> mmap_read_lock(mm);
> vma = find_vma(mm, addr);
> @@ -355,6 +385,9 @@ void handle_page_fault(struct pt_regs *regs)
>
> mmap_read_unlock(mm);
>
> +#ifdef CONFIG_PER_VMA_LOCK
> +done:
> +#endif
> if (unlikely(fault & VM_FAULT_ERROR)) {
> tsk->thread.bad_cause = cause;
> mm_fault_error(regs, addr, fault);