This patch series is meant to address several issues I encountered with VM
translations on x86_64. In my testing I found that swiotlb was incurring up
to a 5% processing overhead due to calls to __phys_addr. To address that I
have updated swiotlb to use physical addresses instead of virtual addresses
to reduce the need to call __phys_addr. However those patches didn't address
the other callers. With these patches applied I am able to achieve an
additional 1% to 2% performance gain on top of the changes to swiotlb.
The first 2 patches are the performance optimizations that result in the 1% to
2% increase in overall performance. The remaining patches are various
cleanups for a number of spots where __pa or virt_to_phys was being called
and was not needed or __pa_symbol could have been used.
---
Alexander Duyck (8):
x86/lguest: Use __pa_symbol instead of __pa on C visible symbols
x86/acpi: Use __pa_symbol instead of __pa on C visible symbols
x86/xen: Use __pa_symbol instead of __pa on C visible symbols
x86/ftrace: Use __pa_symbol instead of __pa on C visible symbols
x86: Use __pa_symbol instead of __pa on C visible symbols
x86: Drop 4 unnecessary calls to __pa_symbol
x86: Make it so that __pa_symbol can only process kernel symbols on x86_64
x86: Improve __phys_addr performance by making use of carry flags and inlining
arch/x86/include/asm/page.h | 3 ++-
arch/x86/include/asm/page_32.h | 1 +
arch/x86/include/asm/page_64_types.h | 20 +++++++++++++++++--
arch/x86/kernel/acpi/sleep.c | 2 +-
arch/x86/kernel/cpu/intel.c | 2 +-
arch/x86/kernel/ftrace.c | 4 ++--
arch/x86/kernel/head32.c | 4 ++--
arch/x86/kernel/head64.c | 4 ++--
arch/x86/kernel/setup.c | 16 ++++++++--------
arch/x86/kernel/x8664_ksyms_64.c | 3 +++
arch/x86/lguest/boot.c | 3 ++-
arch/x86/mm/pageattr.c | 8 ++++----
arch/x86/mm/physaddr.c | 35 ++++++++++++++++++++++++++++------
arch/x86/platform/efi/efi.c | 4 ++--
arch/x86/realmode/init.c | 8 ++++----
arch/x86/xen/mmu.c | 19 ++++++++++--------
16 files changed, 91 insertions(+), 45 deletions(-)
--
I submitted an earlier patch that make __phys_addr an inline. This obviously
results in an increase in the code size. One step I can take to reduce that
is to make it so that the __pa_symbol call does a direct translation for
kernel addresses instead of covering all of virtual memory.
On my system this reduced the size for __pa_symbol from 5 instructions
totalling 30 bytes to 3 instructions totalling 16 bytes.
Signed-off-by: Alexander Duyck <[email protected]>
---
arch/x86/include/asm/page.h | 3 ++-
arch/x86/include/asm/page_32.h | 1 +
arch/x86/include/asm/page_64_types.h | 3 +++
arch/x86/mm/physaddr.c | 15 +++++++++++++++
4 files changed, 21 insertions(+), 1 deletions(-)
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 8ca8283..3698a6a 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -44,7 +44,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
* case properly. Once all supported versions of gcc understand it, we can
* remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated)
*/
-#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x)))
+#define __pa_symbol(x) \
+ __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index da4e762..4d550d0 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long);
#else
#define __phys_addr(x) __phys_addr_nodebug(x)
#endif
+#define __phys_addr_symbol(x) __phys_addr(x)
#define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
#ifdef CONFIG_FLATMEM
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 1ca93d3..a130589 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -69,8 +69,11 @@ static inline unsigned long __phys_addr_nodebug(unsigned long x)
}
#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
+extern unsigned long __phys_addr_symbol(unsigned long);
#else
#define __phys_addr(x) __phys_addr_nodebug(x)
+#define __phys_addr_symbol(x) \
+ ((unsigned long)(x) - __START_KERNEL_map + phys_base)
#endif
#define __phys_reloc_hide(x) (x)
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index f63bec5..666edbd 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -29,6 +29,21 @@ unsigned long __phys_addr(unsigned long x)
return x;
}
EXPORT_SYMBOL(__phys_addr);
+
+unsigned long __phys_addr_symbol(unsigned long x)
+{
+ unsigned long y = x - __START_KERNEL_map;
+
+ /* use the carry flag to determine if x was < __START_KERNEL_map */
+ VIRTUAL_BUG_ON(x < y);
+
+ x = y + phys_base;
+
+ VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+
+ return x;
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
#endif
bool __virt_addr_valid(unsigned long x)
When I made an attempt at separating __pa_symbol and __pa I found that there
were a number of cases where __pa was used on an obvious symbol.
I also caught one non-obvious case as _brk_start and _brk_end are based on the
address of __brk_base which is a C visible symbol.
Signed-off-by: Alexander Duyck <[email protected]>
---
arch/x86/kernel/cpu/intel.c | 2 +-
arch/x86/kernel/setup.c | 16 ++++++++--------
arch/x86/mm/pageattr.c | 8 ++++----
arch/x86/platform/efi/efi.c | 4 ++--
arch/x86/realmode/init.c | 8 ++++----
5 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 198e019..2249e7e 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -168,7 +168,7 @@ int __cpuinit ppro_with_ram_bug(void)
#ifdef CONFIG_X86_F00F_BUG
static void __cpuinit trap_init_f00f_bug(void)
{
- __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+ __set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
/*
* Update the IDT descriptor and reload the IDT so that
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d609be0..391f5f4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -299,8 +299,8 @@ static void __init cleanup_highmap(void)
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
- memblock_reserve(__pa(_brk_start),
- __pa(_brk_end) - __pa(_brk_start));
+ memblock_reserve(__pa_symbol(_brk_start),
+ _brk_end - _brk_start);
/* Mark brk area as locked down and no longer taking any
new allocations */
@@ -760,12 +760,12 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = _brk_end;
- code_resource.start = virt_to_phys(_text);
- code_resource.end = virt_to_phys(_etext)-1;
- data_resource.start = virt_to_phys(_etext);
- data_resource.end = virt_to_phys(_edata)-1;
- bss_resource.start = virt_to_phys(&__bss_start);
- bss_resource.end = virt_to_phys(&__bss_stop)-1;
+ code_resource.start = __pa_symbol(_text);
+ code_resource.end = __pa_symbol(_etext)-1;
+ data_resource.start = __pa_symbol(_etext);
+ data_resource.end = __pa_symbol(_edata)-1;
+ bss_resource.start = __pa_symbol(__bss_start);
+ bss_resource.end = __pa_symbol(__bss_stop)-1;
#ifdef CONFIG_CMDLINE_BOOL
#ifdef CONFIG_CMDLINE_OVERRIDE
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a718e0d..40f92f3 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -94,12 +94,12 @@ static inline void split_page_count(int level) { }
static inline unsigned long highmap_start_pfn(void)
{
- return __pa(_text) >> PAGE_SHIFT;
+ return __pa_symbol(_text) >> PAGE_SHIFT;
}
static inline unsigned long highmap_end_pfn(void)
{
- return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
+ return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
}
#endif
@@ -276,8 +276,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
* The .rodata section needs to be read-only. Using the pfn
* catches all aliases.
*/
- if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
- __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
+ if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
+ __pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index aded2a9..e8d0320 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -406,8 +406,8 @@ void __init efi_reserve_boot_services(void)
* - Not within any part of the kernel
* - Not the bios reserved area
*/
- if ((start+size >= virt_to_phys(_text)
- && start <= virt_to_phys(_end)) ||
+ if ((start+size >= __pa_symbol(_text)
+ && start <= __pa_symbol(_end)) ||
!e820_all_mapped(start, start+size, E820_RAM) ||
memblock_is_region_reserved(start, size)) {
/* Could not reserve, skip it */
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index cbca565..8045026 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -62,9 +62,9 @@ void __init setup_real_mode(void)
__va(real_mode_header->trampoline_header);
#ifdef CONFIG_X86_32
- trampoline_header->start = __pa(startup_32_smp);
+ trampoline_header->start = __pa_symbol(startup_32_smp);
trampoline_header->gdt_limit = __BOOT_DS + 7;
- trampoline_header->gdt_base = __pa(boot_gdt);
+ trampoline_header->gdt_base = __pa_symbol(boot_gdt);
#else
/*
* Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR
@@ -78,8 +78,8 @@ void __init setup_real_mode(void)
*trampoline_cr4_features = read_cr4();
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
- trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE;
- trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE;
+ trampoline_pgd[0] = __pa_symbol(level3_ident_pgt) + _KERNPG_TABLE;
+ trampoline_pgd[511] = __pa_symbol(level3_kernel_pgt) + _KERNPG_TABLE;
#endif
}
Instead of using __pa which is meant to be a general function for converting
virtual addresses to physical addresses we can use __pa_symbol which is the
preferred way of decoding kernel text virtual addresses to physical addresses.
In this case we are not directly converting C visible symbols however if we
know that the instruction pointer is somewhere between _text and _etext we
know that we are going to be translating an address form the kernel text
space.
Cc: Steven Rostedt <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Alexander Duyck <[email protected]>
---
arch/x86/kernel/ftrace.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d41402..42a392a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
* kernel identity mapping to modify code.
*/
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
- ip = (unsigned long)__va(__pa(ip));
+ ip = (unsigned long)__va(__pa_symbol(ip));
return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
}
@@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size)
* kernel identity mapping to modify code.
*/
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
- ip = (unsigned long)__va(__pa(ip));
+ ip = (unsigned long)__va(__pa_symbol(ip));
return probe_kernel_write((void *)ip, val, size);
}
This change updates a few of the functions to use __pa_symbol when
translating C visible symbols instead of __pa. By using __pa_symbol we are
able to drop a few extra lines of code as don't have to test to see if the
virtual pointer is a part of the kernel text or just standard virtual memory.
Cc: Konrad Rzeszutek Wilk <[email protected]>
Signed-off-by: Alexander Duyck <[email protected]>
---
arch/x86/xen/mmu.c | 19 ++++++++++---------
1 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index fd28d86..c50a87e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1449,7 +1449,8 @@ static int xen_pgd_alloc(struct mm_struct *mm)
if (user_pgd != NULL) {
user_pgd[pgd_index(VSYSCALL_START)] =
- __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+ __pgd(__pa_symbol(level3_user_vsyscall) |
+ _PAGE_TABLE);
ret = 0;
}
@@ -1908,7 +1909,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
* pgd.
*/
xen_mc_batch();
- __xen_write_cr3(true, __pa(init_level4_pgt));
+ __xen_write_cr3(true, __pa_symbol(init_level4_pgt));
xen_mc_issue(PARAVIRT_LAZY_CPU);
/* We can't that easily rip out L3 and L2, as the Xen pagetables are
@@ -1931,10 +1932,10 @@ static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
static void __init xen_write_cr3_init(unsigned long cr3)
{
- unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
+ unsigned long pfn = PFN_DOWN(__pa_symbol(swapper_pg_dir));
- BUG_ON(read_cr3() != __pa(initial_page_table));
- BUG_ON(cr3 != __pa(swapper_pg_dir));
+ BUG_ON(read_cr3() != __pa_symbol(initial_page_table));
+ BUG_ON(cr3 != __pa_symbol(swapper_pg_dir));
/*
* We are switching to swapper_pg_dir for the first time (from
@@ -1958,7 +1959,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
- PFN_DOWN(__pa(initial_page_table)));
+ PFN_DOWN(__pa_symbol(initial_page_table)));
set_page_prot(initial_page_table, PAGE_KERNEL);
set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
@@ -1983,7 +1984,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
copy_page(initial_page_table, pgd);
initial_page_table[KERNEL_PGD_BOUNDARY] =
- __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
+ __pgd(__pa_symbol(initial_kernel_pmd) | _PAGE_PRESENT);
set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
set_page_prot(initial_page_table, PAGE_KERNEL_RO);
@@ -1992,8 +1993,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
- PFN_DOWN(__pa(initial_page_table)));
- xen_write_cr3(__pa(initial_page_table));
+ PFN_DOWN(__pa_symbol(initial_page_table)));
+ xen_write_cr3(__pa_symbol(initial_page_table));
memblock_reserve(__pa(xen_start_info->pt_base),
xen_start_info->nr_pt_frames * PAGE_SIZE);
This change just updates one spot where __pa was being used when __pa_symbol
should have been used. By using __pa_symbol we are able to drop a few extra
lines of code as we don't have to test to see if the virtual pointer is a
part of the kernel text or just standard virtual memory.
Cc: Len Brown <[email protected]>
Cc: Pavel Machek <[email protected]>
Cc: "Rafael J. Wysocki" <[email protected]>
Signed-off-by: Alexander Duyck <[email protected]>
---
arch/x86/kernel/acpi/sleep.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 11676cf..f146a3c 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void)
#ifndef CONFIG_64BIT
header->pmode_entry = (u32)&wakeup_pmode_return;
- header->pmode_cr3 = (u32)__pa(&initial_page_table);
+ header->pmode_cr3 = (u32)__pa_symbol(initial_page_table);
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
The function lguest_write_cr3 is using __pa to convert swapper_pg_dir and
initial_page_table from virtual addresses to physical. The correct function
to use for these values is __pa_symbol since they are C visible symbols.
Cc: Rusty Russell <[email protected]>
Signed-off-by: Alexander Duyck <[email protected]>
---
arch/x86/lguest/boot.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 642d880..139dd35 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -552,7 +552,8 @@ static void lguest_write_cr3(unsigned long cr3)
current_cr3 = cr3;
/* These two page tables are simple, linear, and used during boot */
- if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
+ if (cr3 != __pa_symbol(swapper_pg_dir) &&
+ cr3 != __pa_symbol(initial_page_table))
cr3_changed = true;
}
While debugging the __pa_symbol inline patch I found that there were a couple
spots where __pa_symbol was used as follows:
__pa_symbol(x) - __pa_symbol(y)
The compiler had reduced them to:
x - y
Since we also support a debug case where __pa_symbol is a function call it
would probably be useful to just change the two cases I found so that they are
always just treated as "x - y". As such I am casting the values to
phys_addr_t and then doing simple subtraction so that the correct type and
value is returned.
Signed-off-by: Alexander Duyck <[email protected]>
---
arch/x86/kernel/head32.c | 4 ++--
arch/x86/kernel/head64.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c18f59d..f15db0c 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -30,8 +30,8 @@ static void __init i386_default_early_setup(void)
void __init i386_start_kernel(void)
{
- memblock_reserve(__pa_symbol(&_text),
- __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
+ memblock_reserve(__pa_symbol(_text),
+ (phys_addr_t)__bss_stop - (phys_addr_t)_text);
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 037df57..42f5df1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -97,8 +97,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
{
copy_bootdata(__va(real_mode_data));
- memblock_reserve(__pa_symbol(&_text),
- __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
+ memblock_reserve(__pa_symbol(_text),
+ (phys_addr_t)__bss_stop - (phys_addr_t)_text);
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
This patch is meant to improve overall system performance when making use of
the __phys_addr call. To do this I have implemented several changes.
First if CONFIG_DEBUG_VIRTUAL is not defined __phys_addr is made an inline,
similar to how this is currently handled in 32 bit. However in order to do
this it is required to export phys_base so that it is available if __phys_addr
is used in kernel modules.
The second change was to streamline the code by making use of the carry flag
on an add operation instead of performing a compare on a 64 bit value. The
advantage to this is that it allows us to significantly reduce the overall
size of the call. On my Xeon E5 system the entire __phys_addr inline call
consumes a little less than 32 bytes and 5 instructions. I also applied
similar logic to the debug version of the function. My testing shows that the
debug version of the function with this patch applied is slightly faster than
the non-debug version without the patch.
Finally, when building the kernel with the first two changes applied I saw
build warnings about __START_KERNEL_map and PAGE_OFFSET constants not fitting
in their type. In order to resolve the build warning I changed their type
from UL to ULL.
Signed-off-by: Alexander Duyck <[email protected]>
---
arch/x86/include/asm/page_64_types.h | 17 +++++++++++++++--
arch/x86/kernel/x8664_ksyms_64.c | 3 +++
arch/x86/mm/physaddr.c | 20 ++++++++++++++------
3 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 320f7bb..1ca93d3 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -30,14 +30,14 @@
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires.
*/
-#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
+#define __PAGE_OFFSET _AC(0xffff880000000000, ULL)
#define __PHYSICAL_START ((CONFIG_PHYSICAL_START + \
(CONFIG_PHYSICAL_ALIGN - 1)) & \
~(CONFIG_PHYSICAL_ALIGN - 1))
#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
-#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
+#define __START_KERNEL_map _AC(0xffffffff80000000, ULL)
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 46
@@ -58,7 +58,20 @@ void copy_page(void *to, void *from);
extern unsigned long max_pfn;
extern unsigned long phys_base;
+static inline unsigned long __phys_addr_nodebug(unsigned long x)
+{
+ unsigned long y = x - __START_KERNEL_map;
+
+ /* use the carry flag to determine if x was < __START_KERNEL_map */
+ x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
+
+ return x;
+}
+#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
+#else
+#define __phys_addr(x) __phys_addr_nodebug(x)
+#endif
#define __phys_reloc_hide(x) (x)
#define vmemmap ((struct page *)VMEMMAP_START)
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1330dd1..b014d94 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(memmove);
+#ifndef CONFIG_DEBUG_VIRTUAL
+EXPORT_SYMBOL(phys_base);
+#endif
EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
EXPORT_SYMBOL(native_load_gs_index);
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index d2e2735..f63bec5 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -8,20 +8,28 @@
#ifdef CONFIG_X86_64
+#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
- if (x >= __START_KERNEL_map) {
- x -= __START_KERNEL_map;
- VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
- x += phys_base;
+ unsigned long y = x - __START_KERNEL_map;
+
+ /* use the carry flag to determine if x was < __START_KERNEL_map */
+ if (unlikely(x > y)) {
+ x = y + phys_base;
+
+ VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
} else {
- VIRTUAL_BUG_ON(x < PAGE_OFFSET);
- x -= PAGE_OFFSET;
+ x = y + (__START_KERNEL_map - PAGE_OFFSET);
+
+ /* carry flag will be set if starting x was >= PAGE_OFFSET */
+ VIRTUAL_BUG_ON(x > y);
VIRTUAL_BUG_ON(!phys_addr_valid(x));
}
+
return x;
}
EXPORT_SYMBOL(__phys_addr);
+#endif
bool __virt_addr_valid(unsigned long x)
{
Patch series looks good to me. Thanks for doing this properly.
Reviewed-by: Andi Kleen <[email protected]>
-Andi
--
[email protected] -- Speaking for myself only.
On 10/12/2012 06:40 AM, Andi Kleen wrote:
>
> Patch series looks good to me. Thanks for doing this properly.
> Reviewed-by: Andi Kleen <[email protected]>
>
Agreed.
Acked-by: H. Peter Anvin <[email protected]>
I will pick this up after the merge window closes unless Ingo beats me
to it. (I'm currently traveling.)
-hpa
Alexander Duyck <[email protected]> writes:
> The function lguest_write_cr3 is using __pa to convert swapper_pg_dir and
> initial_page_table from virtual addresses to physical. The correct function
> to use for these values is __pa_symbol since they are C visible symbols.
>
> Cc: Rusty Russell <[email protected]>
> Signed-off-by: Alexander Duyck <[email protected]>
Acked-by: Rusty Russell <[email protected]>
Thanks,
Rusty.
On Thu, Oct 11, 2012 at 4:49 PM, Alexander Duyck
<[email protected]> wrote:
> This patch series is meant to address several issues I encountered with VM
> translations on x86_64. In my testing I found that swiotlb was incurring up
> to a 5% processing overhead due to calls to __phys_addr. To address that I
> have updated swiotlb to use physical addresses instead of virtual addresses
> to reduce the need to call __phys_addr. However those patches didn't address
> the other callers. With these patches applied I am able to achieve an
> additional 1% to 2% performance gain on top of the changes to swiotlb.
>
> The first 2 patches are the performance optimizations that result in the 1% to
> 2% increase in overall performance. The remaining patches are various
> cleanups for a number of spots where __pa or virt_to_phys was being called
> and was not needed or __pa_symbol could have been used.
>
Could you also add a blurb in the Documentation/< appropriate file
for device driver writes> mentioning the usage of __pa_symbol is
preferred?
> ---
>
> Alexander Duyck (8):
> x86/lguest: Use __pa_symbol instead of __pa on C visible symbols
> x86/acpi: Use __pa_symbol instead of __pa on C visible symbols
> x86/xen: Use __pa_symbol instead of __pa on C visible symbols
> x86/ftrace: Use __pa_symbol instead of __pa on C visible symbols
> x86: Use __pa_symbol instead of __pa on C visible symbols
> x86: Drop 4 unnecessary calls to __pa_symbol
> x86: Make it so that __pa_symbol can only process kernel symbols on x86_64
> x86: Improve __phys_addr performance by making use of carry flags and inlining
>
>
> arch/x86/include/asm/page.h | 3 ++-
> arch/x86/include/asm/page_32.h | 1 +
> arch/x86/include/asm/page_64_types.h | 20 +++++++++++++++++--
> arch/x86/kernel/acpi/sleep.c | 2 +-
> arch/x86/kernel/cpu/intel.c | 2 +-
> arch/x86/kernel/ftrace.c | 4 ++--
> arch/x86/kernel/head32.c | 4 ++--
> arch/x86/kernel/head64.c | 4 ++--
> arch/x86/kernel/setup.c | 16 ++++++++--------
> arch/x86/kernel/x8664_ksyms_64.c | 3 +++
> arch/x86/lguest/boot.c | 3 ++-
> arch/x86/mm/pageattr.c | 8 ++++----
> arch/x86/mm/physaddr.c | 35 ++++++++++++++++++++++++++++------
> arch/x86/platform/efi/efi.c | 4 ++--
> arch/x86/realmode/init.c | 8 ++++----
> arch/x86/xen/mmu.c | 19 ++++++++++--------
> 16 files changed, 91 insertions(+), 45 deletions(-)
>
> --
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
> Could you also add a blurb in the Documentation/< appropriate file
> for device driver writes> mentioning the usage of __pa_symbol is
> preferred?
Device driver writer's shouldn't use any of this anyways, they should always
use the PCI DMA APIs and never DMA to the stack or to static variables.
-Andi
On Thu, Oct 11, 2012 at 01:50:23PM -0700, Alexander Duyck wrote:
> This change updates a few of the functions to use __pa_symbol when
> translating C visible symbols instead of __pa. By using __pa_symbol we are
> able to drop a few extra lines of code as don't have to test to see if the
> virtual pointer is a part of the kernel text or just standard virtual memory.
>
> Cc: Konrad Rzeszutek Wilk <[email protected]>
Acked-and-Tested-by: Konrad Rzeszutek Wilk <[email protected]>
> Signed-off-by: Alexander Duyck <[email protected]>
> ---
>
> arch/x86/xen/mmu.c | 19 ++++++++++---------
> 1 files changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index fd28d86..c50a87e 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -1449,7 +1449,8 @@ static int xen_pgd_alloc(struct mm_struct *mm)
>
> if (user_pgd != NULL) {
> user_pgd[pgd_index(VSYSCALL_START)] =
> - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
> + __pgd(__pa_symbol(level3_user_vsyscall) |
> + _PAGE_TABLE);
> ret = 0;
> }
>
> @@ -1908,7 +1909,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
> * pgd.
> */
> xen_mc_batch();
> - __xen_write_cr3(true, __pa(init_level4_pgt));
> + __xen_write_cr3(true, __pa_symbol(init_level4_pgt));
> xen_mc_issue(PARAVIRT_LAZY_CPU);
>
> /* We can't that easily rip out L3 and L2, as the Xen pagetables are
> @@ -1931,10 +1932,10 @@ static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
>
> static void __init xen_write_cr3_init(unsigned long cr3)
> {
> - unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
> + unsigned long pfn = PFN_DOWN(__pa_symbol(swapper_pg_dir));
>
> - BUG_ON(read_cr3() != __pa(initial_page_table));
> - BUG_ON(cr3 != __pa(swapper_pg_dir));
> + BUG_ON(read_cr3() != __pa_symbol(initial_page_table));
> + BUG_ON(cr3 != __pa_symbol(swapper_pg_dir));
>
> /*
> * We are switching to swapper_pg_dir for the first time (from
> @@ -1958,7 +1959,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
> pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
>
> pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
> - PFN_DOWN(__pa(initial_page_table)));
> + PFN_DOWN(__pa_symbol(initial_page_table)));
> set_page_prot(initial_page_table, PAGE_KERNEL);
> set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
>
> @@ -1983,7 +1984,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
>
> copy_page(initial_page_table, pgd);
> initial_page_table[KERNEL_PGD_BOUNDARY] =
> - __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
> + __pgd(__pa_symbol(initial_kernel_pmd) | _PAGE_PRESENT);
>
> set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
> set_page_prot(initial_page_table, PAGE_KERNEL_RO);
> @@ -1992,8 +1993,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
> pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
>
> pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
> - PFN_DOWN(__pa(initial_page_table)));
> - xen_write_cr3(__pa(initial_page_table));
> + PFN_DOWN(__pa_symbol(initial_page_table)));
> + xen_write_cr3(__pa_symbol(initial_page_table));
>
> memblock_reserve(__pa(xen_start_info->pt_base),
> xen_start_info->nr_pt_frames * PAGE_SIZE);
On 10/12/2012 08:15 AM, Andi Kleen wrote:
>> Could you also add a blurb in the Documentation/< appropriate file
>> for device driver writes> mentioning the usage of __pa_symbol is
>> preferred?
> Device driver writer's shouldn't use any of this anyways, they should always
> use the PCI DMA APIs and never DMA to the stack or to static variables.
>
> -Andi
__pa_symbol is very architecture specific. From what I can tell it only
exists for the x86 and mips architectures. If a device driver is
expected to function on things such as PowerPC you cannot use it.
Everything I have read indicates that for virtual to physical
translation in drivers it is preferred to use virt_to_phys, not __pa or
__pa_symbol.
Thanks,
Alex
On 10/11/2012 03:58 PM, H. Peter Anvin wrote:
> On 10/12/2012 06:40 AM, Andi Kleen wrote:
>> Patch series looks good to me. Thanks for doing this properly.
>> Reviewed-by: Andi Kleen <[email protected]>
>>
> Agreed.
>
> Acked-by: H. Peter Anvin <[email protected]>
>
> I will pick this up after the merge window closes unless Ingo beats me
> to it. (I'm currently traveling.)
>
> -hpa
>
>
I was wondering if this ever got picked up? If so is there a public
tree I could find them in?
Just wondering since I have some minor changes I would like to make and
I just wanted to figure out if I should rework the patches or submit the
changes as a follow-on.
Thanks,
Alex