Hi,
copy_{from,to}_user() uaccess helpers are implemented by user page
pinning, followed by temporary kernel mapping & then memcpy(). This
helps to achieve user page copy when current virtual address mapping
of the CPU excludes user pages.
Other uaccess routines are also planned to be modified to make use of
pinning plus kmap_atomic() based on the feedback here.
This is done as one of the initial steps to achieve 4G virtual
address mapping for user as well as Kernel on ARMv7 w/ LPAE.
Motive behind this is to enable Kernel access till 4GiB (almost) as
lowmem, thus helping in removing highmem support for platforms having
upto 4GiB RAM. In the case of platforms having >4GiB, highmem is still
required for the Kernel to be able to access whole RAM.
Performance wise, results are not encouraging, 'dd' on tmpfs results,
ARM Cortex-A8, BeagleBone White (256MiB RAM):
w/o series - ~29.5 MB/s
w/ series - ~20.5 MB/s
w/ series & highmem disabled - ~21.2 MB/s
On Cortex-A15(2GiB RAM) in QEMU:
w/o series - ~4 MB/s
w/ series - ~2.6 MB/s
Roughly a one-third drop in performance. Disabling highmem improves
performance only slightly.
'hackbench' also showed a similar pattern.
Ways to improve the performance has to be explored, if any one has
thoughts on it, please share.
uaccess routines using page pinning & temporary kernel mapping is not
something new, it has been done by Ingo long long ago [1] as part of
4G/4G user/kernel mapping implementation on x86, though not merged in
mainline.
Arnd has outlined basic design for vmsplit 4g/4g, uaccess routines
using user page pinning plus kmap_atomic() is one part of that.
[1] https://lore.kernel.org/lkml/[email protected]/
Last 2 patches are only meant for testing first patch.
Regards
afzal
afzal mohammed (3):
lib: copy_{from,to}_user using gup & kmap_atomic()
ARM: uaccess: let UACCESS_GUP_KMAP_MEMCPY enabling
ARM: provide CONFIG_VMSPLIT_4G_DEV for development
arch/arm/Kconfig | 9 ++
arch/arm/include/asm/uaccess.h | 20 ++++
arch/arm/kernel/armksyms.c | 2 +
arch/arm/lib/Makefile | 7 +-
lib/Kconfig | 4 +
lib/Makefile | 3 +
lib/uaccess_gup_kmap_memcpy.c | 162 +++++++++++++++++++++++++++++++++
7 files changed, 205 insertions(+), 2 deletions(-)
create mode 100644 lib/uaccess_gup_kmap_memcpy.c
--
2.26.2
Select UACCESS_GUP_KMAP_MEMCPY initially.
Signed-off-by: afzal mohammed <[email protected]>
---
arch/arm/Kconfig | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c77c93c485a08..ae2687679d7c8 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1326,6 +1326,15 @@ config PAGE_OFFSET
default 0xB0000000 if VMSPLIT_3G_OPT
default 0xC0000000
+config VMSPLIT_4G_DEV
+ bool "Experimental changes for 4G/4G user/kernel split"
+ depends on ARM_LPAE
+ select UACCESS_GUP_KMAP_MEMCPY
+ help
+ Experimental changes during 4G/4G user/kernel split development.
+ Existing vmsplit config option is used, once development is done,
+ this would be put as a new choice & _DEV suffix removed.
+
config NR_CPUS
int "Maximum number of CPUs (2-32)"
range 2 32
--
2.26.2
Turn off existing raw_copy_{from,to}_user() using
arm_copy_{from,to}_user() when CONFIG_UACCESS_GUP_KMAP_MEMCPY is
enabled.
Signed-off-by: afzal mohammed <[email protected]>
---
arch/arm/include/asm/uaccess.h | 20 ++++++++++++++++++++
arch/arm/kernel/armksyms.c | 2 ++
arch/arm/lib/Makefile | 7 +++++--
3 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 98c6b91be4a8a..4a16ae52d4978 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -512,6 +512,15 @@ do { \
extern unsigned long __must_check
arm_copy_from_user(void *to, const void __user *from, unsigned long n);
+#ifdef CONFIG_UACCESS_GUP_KMAP_MEMCPY
+extern unsigned long __must_check
+gup_kmap_copy_from_user(void *to, const void __user *from, unsigned long n);
+static inline __must_check unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ return gup_kmap_copy_from_user(to, from, n);
+}
+#else
static inline unsigned long __must_check
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
@@ -522,12 +531,22 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
uaccess_restore(__ua_flags);
return n;
}
+#endif
extern unsigned long __must_check
arm_copy_to_user(void __user *to, const void *from, unsigned long n);
extern unsigned long __must_check
__copy_to_user_std(void __user *to, const void *from, unsigned long n);
+#ifdef CONFIG_UACCESS_GUP_KMAP_MEMCPY
+extern unsigned long __must_check
+gup_kmap_copy_to_user(void __user *to, const void *from, unsigned long n);
+static inline __must_check unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ return gup_kmap_copy_to_user(to, from, n);
+}
+#else
static inline unsigned long __must_check
raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
@@ -541,6 +560,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
return arm_copy_to_user(to, from, n);
#endif
}
+#endif
extern unsigned long __must_check
arm_clear_user(void __user *addr, unsigned long n);
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 98bdea51089d5..8c92fe30d1559 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -96,8 +96,10 @@ EXPORT_SYMBOL(mmiocpy);
#ifdef CONFIG_MMU
EXPORT_SYMBOL(copy_page);
+#ifndef CONFIG_UACCESS_GUP_KMAP_MEMCPY
EXPORT_SYMBOL(arm_copy_from_user);
EXPORT_SYMBOL(arm_copy_to_user);
+#endif
EXPORT_SYMBOL(arm_clear_user);
EXPORT_SYMBOL(__get_user_1);
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 6d2ba454f25b6..1aeff2cd7b4b3 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -16,8 +16,11 @@ lib-y := changebit.o csumipv6.o csumpartial.o \
io-readsb.o io-writesb.o io-readsl.o io-writesl.o \
call_with_stack.o bswapsdi2.o
-mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
- copy_from_user.o copy_to_user.o
+mmu-y := clear_user.o copy_page.o getuser.o putuser.o
+
+ifndef CONFIG_UACCESS_GUP_KMAP_MEMCPY
+ mmu-y += copy_from_user.o copy_to_user.o
+endif
ifdef CONFIG_CC_IS_CLANG
lib-y += backtrace-clang.o
--
2.26.2
On Fri, 12 Jun 2020, afzal mohammed wrote:
> Performance wise, results are not encouraging, 'dd' on tmpfs results,
>
> ARM Cortex-A8, BeagleBone White (256MiB RAM):
> w/o series - ~29.5 MB/s
> w/ series - ~20.5 MB/s
> w/ series & highmem disabled - ~21.2 MB/s
>
> On Cortex-A15(2GiB RAM) in QEMU:
> w/o series - ~4 MB/s
> w/ series - ~2.6 MB/s
>
> Roughly a one-third drop in performance. Disabling highmem improves
> performance only slightly.
Could you compare with CONFIG_UACCESS_WITH_MEMCPY as well?
Nicolas
Hi,
On Fri, Jun 12, 2020 at 11:19:23AM -0400, Nicolas Pitre wrote:
> On Fri, 12 Jun 2020, afzal mohammed wrote:
> > Performance wise, results are not encouraging, 'dd' on tmpfs results,
> Could you compare with CONFIG_UACCESS_WITH_MEMCPY as well?
512 1K 4K 16K 32K 64K 1M
normal 30 46 89 95 90 85 65
uaccess_w_memcpy 28.5 45 85 92 91 85 65
w/ series 22 36 72 79 78 75 61
There are variations in the range +/-2 in some readings when repeated,
not put above, to keep comparison simple.
Regards
afzal
Hi,
On Fri, Jun 12, 2020 at 09:31:12PM +0530, afzal mohammed wrote:
> 512 1K 4K 16K 32K 64K 1M
>
> normal 30 46 89 95 90 85 65
>
> uaccess_w_memcpy 28.5 45 85 92 91 85 65
>
> w/ series 22 36 72 79 78 75 61
For the sake of completeness all in MB/s, w/ various 'dd' 'bs' sizes.
Regards
afzal