Hi,
Currently, the page tables created using memremap_pages() are always
created with the PAGE_KERNEL cacheing mode. However, the P2PDMA code
is creating pages for PCI BAR memory which should never be accessed
through the cache and instead use either WC or UC. This still works in
most cases, on x86, because the MTRR registers typically override the
caching settings in the page tables for all of the IO memory to be
UC-. However, this tends not to work so well on other arches or
some rare x86 machines that have firmware which does not setup the
MTRR registers in this way.
Instead of this, this series proposes a change to arch_add_memory()
to take the pgprot required by the mapping which allows us to
explicitly set pagetable entries for P2PDMA memory to WC.
This changes is pretty routine for most of the arches: x86_64, s390, arm64
and powerpc simply need to thread the pgprot through to where the page tables
are setup. x86_32 unfortunately sets up the page tables at boot so
must use _set_memory_prot() to change their caching mode. ia64 and sh
don't appear to have an easy way to change the page tables so, for now
at least, we just return -EINVAL on such mappings and thus they will
not support P2PDMA memory until the work for this is done.
Thanks,
Logan
--
Logan Gunthorpe (6):
x86/mm: Thread pgprot_t through init_memory_mapping()
x86/mm: Introduce _set_memory_prot()
powerpc/mm: Thread pgprot_t through create_section_mapping()
s390/mm: Thread pgprot_t through vmem_add_mapping()
mm, memory_hotplug: Provide argument for the pgprot_t in
arch_add_memory()
mm/memremap: Set caching mode for PCI P2PDMA memory to WC
arch/arm64/mm/mmu.c | 4 +--
arch/ia64/mm/init.c | 5 +++-
arch/powerpc/include/asm/book3s/64/hash.h | 3 +-
arch/powerpc/include/asm/book3s/64/radix.h | 3 +-
arch/powerpc/include/asm/sparsemem.h | 3 +-
arch/powerpc/mm/book3s64/hash_utils.c | 5 ++--
arch/powerpc/mm/book3s64/pgtable.c | 7 +++--
arch/powerpc/mm/book3s64/radix_pgtable.c | 18 +++++++-----
arch/powerpc/mm/mem.c | 7 +++--
arch/s390/include/asm/pgtable.h | 3 +-
arch/s390/mm/extmem.c | 3 +-
arch/s390/mm/init.c | 4 +--
arch/s390/mm/vmem.c | 10 +++----
arch/sh/mm/init.c | 5 +++-
arch/x86/include/asm/page_types.h | 3 --
arch/x86/include/asm/pgtable.h | 3 ++
arch/x86/include/asm/set_memory.h | 1 +
arch/x86/kernel/amd_gart_64.c | 3 +-
arch/x86/mm/init.c | 9 +++---
arch/x86/mm/init_32.c | 10 +++++--
arch/x86/mm/init_64.c | 34 ++++++++++++----------
arch/x86/mm/mm_internal.h | 3 +-
arch/x86/mm/pageattr.c | 7 +++++
arch/x86/platform/efi/efi_64.c | 3 +-
include/linux/memory_hotplug.h | 2 +-
mm/memory_hotplug.c | 2 +-
mm/memremap.c | 5 +++-
27 files changed, 104 insertions(+), 61 deletions(-)
--
2.20.1
In prepartion to support a pgprot_t argument for arch_add_memory().
It's required to move the prototype of init_memory_mapping() seeing
the original location came before the definition of pgprot_t.
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Cc: Dave Hansen <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Signed-off-by: Logan Gunthorpe <[email protected]>
---
arch/x86/include/asm/page_types.h | 3 ---
arch/x86/include/asm/pgtable.h | 3 +++
arch/x86/kernel/amd_gart_64.c | 3 ++-
arch/x86/mm/init.c | 9 +++++----
arch/x86/mm/init_32.c | 3 ++-
arch/x86/mm/init_64.c | 32 +++++++++++++++++--------------
arch/x86/mm/mm_internal.h | 3 ++-
arch/x86/platform/efi/efi_64.c | 3 ++-
8 files changed, 34 insertions(+), 25 deletions(-)
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c85e15010f48..bf7aa2e290ef 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -73,9 +73,6 @@ static inline phys_addr_t get_max_mapped(void)
bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
-extern unsigned long init_memory_mapping(unsigned long start,
- unsigned long end);
-
extern void initmem_init(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 0bc530c4eb13..f1c3a3840edf 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1041,6 +1041,9 @@ static inline void __meminit init_trampoline_default(void)
void __init poking_init(void);
+unsigned long init_memory_mapping(unsigned long start,
+ unsigned long end, pgprot_t prot);
+
# ifdef CONFIG_RANDOMIZE_MEMORY
void __meminit init_trampoline(void);
# else
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index a6ac3712db8b..1decce1d9030 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -746,7 +746,8 @@ int __init gart_iommu_init(void)
start_pfn = PFN_DOWN(aper_base);
if (!pfn_range_is_mapped(start_pfn, end_pfn))
- init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+ init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT,
+ PAGE_KERNEL);
pr_info("PCI-DMA: using GART IOMMU.\n");
iommu_size = check_iommu_size(info.aper_base, aper_size);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fd10d91a6115..efa4517f9e3d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -467,7 +467,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
* the physical memory. To access them they are temporarily mapped.
*/
unsigned long __ref init_memory_mapping(unsigned long start,
- unsigned long end)
+ unsigned long end, pgprot_t prot)
{
struct map_range mr[NR_RANGE_MR];
unsigned long ret = 0;
@@ -481,7 +481,8 @@ unsigned long __ref init_memory_mapping(unsigned long start,
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
- mr[i].page_size_mask);
+ mr[i].page_size_mask,
+ prot);
add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
@@ -521,7 +522,7 @@ static unsigned long __init init_range_memory_mapping(
*/
can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >=
min(end, (u64)pgt_buf_top<<PAGE_SHIFT);
- init_memory_mapping(start, end);
+ init_memory_mapping(start, end, PAGE_KERNEL);
mapped_ram_size += end - start;
can_use_brk_pgt = true;
}
@@ -661,7 +662,7 @@ void __init init_mem_mapping(void)
#endif
/* the ISA range is always mapped regardless of memory holes */
- init_memory_mapping(0, ISA_END_ADDRESS);
+ init_memory_mapping(0, ISA_END_ADDRESS, PAGE_KERNEL);
/* Init the trampoline, possibly with KASLR memory offset */
init_trampoline();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 930edeb41ec3..d3cdd9137f42 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -252,7 +252,8 @@ static inline int is_kernel_text(unsigned long addr)
unsigned long __init
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask,
+ pgprot_t prot)
{
int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
unsigned long last_map_addr = end;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a6b5c653727b..65a5093ec97b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -585,7 +585,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
*/
static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask, bool init)
+ unsigned long page_size_mask, pgprot_t _prot, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@@ -595,7 +595,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
pud_t *pud;
pmd_t *pmd;
- pgprot_t prot = PAGE_KERNEL;
+ pgprot_t prot = _prot;
vaddr = (unsigned long)__va(paddr);
pud = pud_page + pud_index(vaddr);
@@ -644,9 +644,12 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++;
spin_lock(&init_mm.page_table_lock);
+
+ prot = __pgprot(pgprot_val(prot) | __PAGE_KERNEL_LARGE);
+
set_pte_init((pte_t *)pud,
pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
- PAGE_KERNEL_LARGE),
+ prot),
init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
@@ -669,7 +672,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
static unsigned long __meminit
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask, bool init)
+ unsigned long page_size_mask, pgprot_t prot, bool init)
{
unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
@@ -679,7 +682,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
- page_size_mask, init);
+ page_size_mask, prot, init);
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
p4d_t *p4d = p4d_page + p4d_index(vaddr);
@@ -702,13 +705,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0);
paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
- page_size_mask, init);
+ page_size_mask, prot, init);
continue;
}
pud = alloc_low_page();
paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
- page_size_mask, init);
+ page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
p4d_populate_init(&init_mm, p4d, pud, init);
@@ -722,7 +725,7 @@ static unsigned long __meminit
__kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask,
- bool init)
+ pgprot_t prot, bool init)
{
bool pgd_changed = false;
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
@@ -743,13 +746,13 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
paddr_last = phys_p4d_init(p4d, __pa(vaddr),
__pa(vaddr_end),
page_size_mask,
- init);
+ prot, init);
continue;
}
p4d = alloc_low_page();
paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
- page_size_mask, init);
+ page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
if (pgtable_l5_enabled())
@@ -778,10 +781,10 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long __meminit
kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long paddr_end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask, pgprot_t prot)
{
return __kernel_physical_mapping_init(paddr_start, paddr_end,
- page_size_mask, true);
+ page_size_mask, prot, true);
}
/*
@@ -796,7 +799,8 @@ kernel_physical_mapping_change(unsigned long paddr_start,
unsigned long page_size_mask)
{
return __kernel_physical_mapping_init(paddr_start, paddr_end,
- page_size_mask, false);
+ page_size_mask, PAGE_KERNEL,
+ false);
}
#ifndef CONFIG_NUMA
@@ -864,7 +868,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- init_memory_mapping(start, start + size);
+ init_memory_mapping(start, start + size, PAGE_KERNEL);
return add_pages(nid, start_pfn, nr_pages, restrictions);
}
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index eeae142062ed..3f37b5c80bb3 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -12,7 +12,8 @@ void early_ioremap_page_table_range_init(void);
unsigned long kernel_physical_mapping_init(unsigned long start,
unsigned long end,
- unsigned long page_size_mask);
+ unsigned long page_size_mask,
+ pgprot_t prot);
unsigned long kernel_physical_mapping_change(unsigned long start,
unsigned long end,
unsigned long page_size_mask);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 08ce8177c3af..3d0acfdb58c3 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -499,7 +499,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
if (type == EFI_MEMORY_MAPPED_IO)
return ioremap(phys_addr, size);
- last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+ last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size,
+ PAGE_KERNEL);
if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
unsigned long top = last_map_pfn << PAGE_SHIFT;
efi_ioremap(top, size - (top - phys_addr), type, attribute);
--
2.20.1
In prepartion to support a pgprot_t argument for arch_add_memory().
Cc: Heiko Carstens <[email protected]>
Cc: Vasily Gorbik <[email protected]>
Cc: Christian Borntraeger <[email protected]>
Signed-off-by: Logan Gunthorpe <[email protected]>
---
arch/s390/include/asm/pgtable.h | 3 ++-
arch/s390/mm/extmem.c | 3 ++-
arch/s390/mm/init.c | 2 +-
arch/s390/mm/vmem.c | 10 +++++-----
4 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5ff98d76a66c..e0d5ed38282b 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1673,7 +1673,8 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define kern_addr_valid(addr) (1)
-extern int vmem_add_mapping(unsigned long start, unsigned long size);
+extern int vmem_add_mapping(unsigned long start, unsigned long size,
+ pgprot_t prot);
extern int vmem_remove_mapping(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
extern int s390_enable_skey(void);
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index fd0dae9d10f4..6cf7029a7b35 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -313,7 +313,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
goto out_free;
}
- rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+ rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1,
+ PAGE_KERNEL);
if (rc)
goto out_free;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index a124f19f7b3c..263ebb074cdd 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -276,7 +276,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
if (WARN_ON_ONCE(restrictions->altmap))
return -EINVAL;
- rc = vmem_add_mapping(start, size);
+ rc = vmem_add_mapping(start, size, PAGE_KERNEL);
if (rc)
return rc;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index b403fa14847d..8a5e95f184a2 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -66,7 +66,7 @@ pte_t __ref *vmem_pte_alloc(void)
/*
* Add a physical memory range to the 1:1 mapping.
*/
-static int vmem_add_mem(unsigned long start, unsigned long size)
+static int vmem_add_mem(unsigned long start, unsigned long size, pgprot_t prot)
{
unsigned long pgt_prot, sgt_prot, r3_prot;
unsigned long pages4k, pages1m, pages2g;
@@ -79,7 +79,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
pte_t *pt_dir;
int ret = -ENOMEM;
- pgt_prot = pgprot_val(PAGE_KERNEL);
+ pgt_prot = pgprot_val(prot);
sgt_prot = pgprot_val(SEGMENT_KERNEL);
r3_prot = pgprot_val(REGION3_KERNEL);
if (!MACHINE_HAS_NX) {
@@ -362,7 +362,7 @@ int vmem_remove_mapping(unsigned long start, unsigned long size)
return ret;
}
-int vmem_add_mapping(unsigned long start, unsigned long size)
+int vmem_add_mapping(unsigned long start, unsigned long size, pgprot_t prot)
{
struct memory_segment *seg;
int ret;
@@ -379,7 +379,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
if (ret)
goto out_free;
- ret = vmem_add_mem(start, size);
+ ret = vmem_add_mem(start, size, prot);
if (ret)
goto out_remove;
goto out;
@@ -403,7 +403,7 @@ void __init vmem_map_init(void)
struct memblock_region *reg;
for_each_memblock(memory, reg)
- vmem_add_mem(reg->base, reg->size);
+ vmem_add_mem(reg->base, reg->size, PAGE_KERNEL);
__set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
--
2.20.1
For use in the 32bit arch_add_memory() to set the pgprot type of the
memory to add.
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Cc: Dave Hansen <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Signed-off-by: Logan Gunthorpe <[email protected]>
---
arch/x86/include/asm/set_memory.h | 1 +
arch/x86/mm/pageattr.c | 7 +++++++
2 files changed, 8 insertions(+)
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 2ee8e469dcf5..d728c2f3ad96 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -34,6 +34,7 @@
* The caller is required to take care of these.
*/
+int _set_memory_prot(unsigned long addr, int numpages, pgprot_t prot);
int _set_memory_uc(unsigned long addr, int numpages);
int _set_memory_wc(unsigned long addr, int numpages);
int _set_memory_wt(unsigned long addr, int numpages);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0d09cc5aad61..024e3ddf494d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1781,6 +1781,13 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages,
CPA_PAGES_ARRAY, pages);
}
+int _set_memory_prot(unsigned long addr, int numpages, pgprot_t prot)
+{
+ return change_page_attr_set_clr(&addr, numpages, prot,
+ __pgprot(~pgprot_val(prot)), 0, 0,
+ NULL);
+}
+
int _set_memory_uc(unsigned long addr, int numpages)
{
/*
--
2.20.1
On Mon, Dec 09, 2019 at 12:13:40PM -0700, Logan Gunthorpe wrote:
> This changes is pretty routine for most of the arches: x86_64, s390, arm64
> and powerpc simply need to thread the pgprot through to where the page tables
> are setup. x86_32 unfortunately sets up the page tables at boot so
> must use _set_memory_prot() to change their caching mode. ia64 and sh
> don't appear to have an easy way to change the page tables so, for now
> at least, we just return -EINVAL on such mappings and thus they will
> not support P2PDMA memory until the work for this is done.
ia64 and sh don't support ZONE_DEVICE mappings anyway as far as I know.
This generally looks fine to me.