Hi all,
this series replaced the arch_dma_mmap_pgprot hooks with the
simpler pgprot_dmacoherent as used by the arm code already and
cleans up various bits around that area.
I'd still like to hear a confirmation from the mips folks how
the write combibe attribute can or can't work with the KSEG1
uncached segment.
Mips uses the KSEG1 kernel memory segment do map dma coherent
allocations for non-coherent devices as uncachable, and does not have
any kind of special support for DMA_ATTR_WRITE_COMBINE in the allocation
path. Thus supporting DMA_ATTR_WRITE_COMBINE in dma_mmap_attrs will
lead to multiple mappings with different caching attributes.
Fixes: 8c172467be36 ("MIPS: Add implementation of dma_map_ops.mmap()")
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/mips/Kconfig | 1 -
arch/mips/mm/dma-noncoherent.c | 8 --------
2 files changed, 9 deletions(-)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d50fafd7bf3a..86e6760ef0d0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1119,7 +1119,6 @@ config DMA_PERDEV_COHERENT
config DMA_NONCOHERENT
bool
- select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_UNCACHED_SEGMENT
select NEED_DMA_MAP_STATE
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index ed56c6fa7be2..1d4d57dd9acf 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -65,14 +65,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr)));
}
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
- unsigned long attrs)
-{
- if (attrs & DMA_ATTR_WRITE_COMBINE)
- return pgprot_writecombine(prot);
- return pgprot_noncached(prot);
-}
-
static inline void dma_sync_virt(void *addr, size_t size,
enum dma_data_direction dir)
{
--
2.20.1
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/arm/include/asm/pgtable-nommu.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index 0b1f6799a32e..d0de24f06724 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -62,7 +62,6 @@ typedef pte_t *pte_addr_t;
*/
#define pgprot_noncached(prot) (prot)
#define pgprot_writecombine(prot) (prot)
-#define pgprot_dmacoherent(prot) (prot)
#define pgprot_device(prot) (prot)
--
2.20.1
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/unicore32/include/asm/pgtable.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
index 9492aa304f03..126e961a8cb0 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -198,8 +198,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
__pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
#define pgprot_writecombine(prot) \
__pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
-#define pgprot_dmacoherent(prot) \
- __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_present(pmd) (pmd_val(pmd) & PMD_PRESENT)
--
2.20.1
arch_dma_mmap_pgprot is used for two things:
1) to override the "normal" uncached page attributes for mapping
memory coherent to devices that can't snoop the CPU caches
2) to provide the special DMA_ATTR_WRITE_COMBINE semantics on older
arm systems
Replace one with the pgprot_dmacoherent macro that is already provided
by arm and much simpler to use, and lift the DMA_ATTR_WRITE_COMBINE
handling to common code with an explicit arch opt-in.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/arm/Kconfig | 1 +
arch/arm/mm/Kconfig | 1 -
arch/arm/mm/dma-mapping.c | 6 ------
arch/arm64/Kconfig | 1 -
arch/arm64/include/asm/pgtable.h | 4 ++++
arch/arm64/mm/dma-mapping.c | 6 ------
arch/m68k/Kconfig | 1 -
arch/m68k/include/asm/pgtable_mm.h | 3 +++
arch/m68k/kernel/dma.c | 3 +--
include/linux/dma-noncoherent.h | 13 +++++++++++--
kernel/dma/Kconfig | 14 +++++++++++---
kernel/dma/mapping.c | 8 +++++---
12 files changed, 36 insertions(+), 25 deletions(-)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 33b00579beff..e172fba1e8fd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,6 +7,7 @@ config ARM
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KEEPINITRD
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c54cd7ed90ba..0609c9e2191b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -665,7 +665,6 @@ config ARM_LPAE
select PHYS_ADDR_T_64BIT
select SWIOTLB
select ARCH_HAS_DMA_COHERENT_TO_PFN
- select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
help
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index d42557ee69c2..d27b12f61737 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2402,12 +2402,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
return dma_to_pfn(dev, dma_addr);
}
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
- unsigned long attrs)
-{
- return __get_dma_pgprot(attrs, prot);
-}
-
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3adcec05b1f6..dab9dda34206 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -13,7 +13,6 @@ config ARM64
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_COHERENT_TO_PFN
- select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e09760ece844..6700371227d1 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -435,6 +435,10 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
#define pgprot_device(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
+#define pgprot_dmacoherent(prot) \
+ __pgprot_modify(prot, PTE_ATTRINDX_MASK, \
+ PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
+
#define __HAVE_PHYS_MEM_ACCESS_PROT
struct file;
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index bd2b039f43a6..676efcda51e6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -11,12 +11,6 @@
#include <asm/cacheflush.h>
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
- unsigned long attrs)
-{
- return pgprot_writecombine(prot);
-}
-
void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir)
{
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index c518d695c376..a9e564306d3e 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -4,7 +4,6 @@ config M68K
default y
select ARCH_32BIT_OFF_T
select ARCH_HAS_BINFMT_FLAT
- select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index fe3ddd73a0cc..fde4534b974f 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -169,6 +169,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
? (__pgprot((pgprot_val(prot) & _CACHEMASK040) | _PAGE_NOCACHE_S)) \
: (prot)))
+pgprot_t pgprot_dmacoherent(pgprot_t prot);
+#define pgprot_dmacoherent(prot) pgprot_dmacoherent(prot)
+
#endif /* CONFIG_COLDFIRE */
#include <asm-generic/pgtable.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 30cd59caf037..35064150e348 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -23,8 +23,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
cache_push(page_to_phys(page), size);
}
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
- unsigned long attrs)
+pgprot_t pgprot_dmacoherent(pgprot_t prot)
{
if (CPU_IS_040_OR_060) {
pgprot_val(prot) &= ~_PAGE_CACHE040;
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 0bff3d7fac92..dd3de6d88fc0 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -3,6 +3,7 @@
#define _LINUX_DMA_NONCOHERENT_H 1
#include <linux/dma-mapping.h>
+#include <asm/pgtable.h>
#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H
#include <asm/dma-coherence.h>
@@ -42,10 +43,18 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs);
long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
dma_addr_t dma_addr);
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
- unsigned long attrs);
#ifdef CONFIG_MMU
+/*
+ * Page protection so that devices that can't snoop CPU caches can use the
+ * memory coherently. We default to pgprot_noncached which is usually used
+ * for ioremap as a safe bet, but architectures can override this with less
+ * strict semantics if possible.
+ */
+#ifndef pgprot_dmacoherent
+#define pgprot_dmacoherent(prot) pgprot_noncached(prot)
+#endif
+
pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs);
#else
static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot,
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 9decbba255fc..49148c207563 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -20,6 +20,17 @@ config ARCH_HAS_DMA_COHERENCE_H
config ARCH_HAS_DMA_SET_MASK
bool
+#
+# Select this option if the architecture needs special handling for
+# DMA_ATTR_WRITE_COMBINE. Normally the "uncached" mapping should be
+# what people thing of when saying write combine, but on old arm
+# platforms the write combine semantics are not well defined and thus
+# not enabled by default. You probably do not want to enable this for
+# any new port.
+#
+config ARCH_HAS_DMA_WRITE_COMBINE
+ bool
+
config DMA_DECLARE_COHERENT
bool
@@ -45,9 +56,6 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_DMA_COHERENT_TO_PFN
bool
-config ARCH_HAS_DMA_MMAP_PGPROT
- bool
-
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index b0038ca3aa92..1b96616c9f20 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -161,9 +161,11 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
(IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
(attrs & DMA_ATTR_NON_CONSISTENT)))
return prot;
- if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT))
- return arch_dma_mmap_pgprot(dev, prot, attrs);
- return pgprot_noncached(prot);
+#ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE
+ if (attrs & DMA_ATTR_WRITE_COMBINE)
+ return pgprot_writecombine(prot);
+#endif
+ return pgprot_dmacoherent(prot);
}
#endif /* CONFIG_MMU */
--
2.20.1
The memory allocated for the atomic pool needs to have the same
mapping attributes that we use for remapping, so use
pgprot_dmacoherent instead of open coding it. Also deduct a
suitable zone to allocate the memory from based on the presence
of the DMA zones.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/arc/mm/dma.c | 6 ------
arch/arm64/mm/dma-mapping.c | 6 ------
arch/csky/mm/dma-mapping.c | 6 ------
arch/nds32/kernel/dma.c | 6 ------
include/linux/dma-mapping.h | 1 -
kernel/dma/remap.c | 17 ++++++++++++++---
6 files changed, 14 insertions(+), 28 deletions(-)
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 62c210e7ee4c..ff4a5752f8cc 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -104,9 +104,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev_info(dev, "use %sncoherent DMA ops\n",
dev->dma_coherent ? "" : "non");
}
-
-static int __init atomic_pool_init(void)
-{
- return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 676efcda51e6..a1d05f669f67 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -28,12 +28,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
__dma_flush_area(page_address(page), size);
}
-static int __init arm64_dma_init(void)
-{
- return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
-}
-arch_initcall(arm64_dma_init);
-
#ifdef CONFIG_IOMMU_DMA
void arch_teardown_dma_ops(struct device *dev)
{
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index 80783bb71c5c..602a60d47a94 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -14,12 +14,6 @@
#include <linux/version.h>
#include <asm/cache.h>
-static int __init atomic_pool_init(void)
-{
- return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
-
void arch_dma_prep_coherent(struct page *page, size_t size)
{
if (PageHighMem(page)) {
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
index 490e3720d694..4206d4b6c8ce 100644
--- a/arch/nds32/kernel/dma.c
+++ b/arch/nds32/kernel/dma.c
@@ -80,9 +80,3 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
{
cache_op(page_to_phys(page), size, cpu_dma_wbinval_range);
}
-
-static int __init atomic_pool_init(void)
-{
- return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f7d1eea32c78..48ebe8295987 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -624,7 +624,6 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
-int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot);
bool dma_in_atomic_pool(void *start, size_t size);
void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
bool dma_free_from_pool(void *start, size_t size);
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index ffe78f0b2fe4..838123f79639 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -105,7 +105,16 @@ static int __init early_coherent_pool(char *p)
}
early_param("coherent_pool", early_coherent_pool);
-int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
+static gfp_t dma_atomic_pool_gfp(void)
+{
+ if (IS_ENABLED(CONFIG_ZONE_DMA))
+ return GFP_DMA;
+ if (IS_ENABLED(CONFIG_ZONE_DMA32))
+ return GFP_DMA32;
+ return GFP_KERNEL;
+}
+
+static int __init dma_atomic_pool_init(void)
{
unsigned int pool_size_order = get_order(atomic_pool_size);
unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
@@ -117,7 +126,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
page = dma_alloc_from_contiguous(NULL, nr_pages,
pool_size_order, false);
else
- page = alloc_pages(gfp, pool_size_order);
+ page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order);
if (!page)
goto out;
@@ -128,7 +137,8 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
goto free_page;
addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP,
- prot, __builtin_return_address(0));
+ pgprot_dmacoherent(PAGE_KERNEL),
+ __builtin_return_address(0));
if (!addr)
goto destroy_genpool;
@@ -155,6 +165,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
atomic_pool_size / 1024);
return -ENOMEM;
}
+postcore_initcall(dma_atomic_pool_init);
bool dma_in_atomic_pool(void *start, size_t size)
{
--
2.20.1
Based on an email from Will Deacon.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/arm64/include/asm/pgtable.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6700371227d1..6ff221d9a631 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -435,6 +435,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
#define pgprot_device(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
+/*
+ * DMA allocations for non-coherent devices use what the Arm architecture calls
+ * "Normal non-cacheable" memory, which permits speculation, unaligned accesses
+ * and merging of writes. This is different from "Strongly Ordered" memory
+ * which is intended for MMIO and thus forbids speculation, preserves access
+ * size, requires strict alignment and also forces write responses to come from
+ * the endpoint.
+ */
#define pgprot_dmacoherent(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, \
PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
--
2.20.1
On Fri, Aug 16, 2019 at 9:19 AM Christoph Hellwig <[email protected]> wrote:
> arch_dma_mmap_pgprot is used for two things:
>
> 1) to override the "normal" uncached page attributes for mapping
> memory coherent to devices that can't snoop the CPU caches
> 2) to provide the special DMA_ATTR_WRITE_COMBINE semantics on older
> arm systems
>
> Replace one with the pgprot_dmacoherent macro that is already provided
> by arm and much simpler to use, and lift the DMA_ATTR_WRITE_COMBINE
> handling to common code with an explicit arch opt-in.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> arch/m68k/Kconfig | 1 -
> arch/m68k/include/asm/pgtable_mm.h | 3 +++
> arch/m68k/kernel/dma.c | 3 +--
Acked-by: Geert Uytterhoeven <[email protected]>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
Hi Christoph,
Thanks for spinning this into a patch.
On Fri, Aug 16, 2019 at 09:07:54AM +0200, Christoph Hellwig wrote:
> Based on an email from Will Deacon.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/arm64/include/asm/pgtable.h | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 6700371227d1..6ff221d9a631 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -435,6 +435,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
> #define pgprot_device(prot) \
> __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
> +/*
> + * DMA allocations for non-coherent devices use what the Arm architecture calls
> + * "Normal non-cacheable" memory, which permits speculation, unaligned accesses
> + * and merging of writes. This is different from "Strongly Ordered" memory
> + * which is intended for MMIO and thus forbids speculation, preserves access
> + * size, requires strict alignment and also forces write responses to come from
> + * the endpoint.
> + */
Mind if I tweak the second sentence to be:
This is different from "Device-nGnR[nE]" memory which is intended for MMIO
and thus forbids speculation, preserves access size, requires strict
alignment and can also force write responses to come from the endpoint.
? It's a small change, but it better fits with the arm64 terminology
("strongly ordered" is no longer used in the architecture).
If you're happy with that, I can make the change and queue this patch
for 5.4.
Thanks,
Will
On Fri, Aug 16, 2019 at 06:31:18PM +0100, Will Deacon wrote:
> Hi Christoph,
>
> Thanks for spinning this into a patch.
>
> On Fri, Aug 16, 2019 at 09:07:54AM +0200, Christoph Hellwig wrote:
> > Based on an email from Will Deacon.
> >
> > Signed-off-by: Christoph Hellwig <[email protected]>
> > ---
> > arch/arm64/include/asm/pgtable.h | 8 ++++++++
> > 1 file changed, 8 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> > index 6700371227d1..6ff221d9a631 100644
> > --- a/arch/arm64/include/asm/pgtable.h
> > +++ b/arch/arm64/include/asm/pgtable.h
> > @@ -435,6 +435,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> > __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
> > #define pgprot_device(prot) \
> > __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
> > +/*
> > + * DMA allocations for non-coherent devices use what the Arm architecture calls
> > + * "Normal non-cacheable" memory, which permits speculation, unaligned accesses
> > + * and merging of writes. This is different from "Strongly Ordered" memory
> > + * which is intended for MMIO and thus forbids speculation, preserves access
> > + * size, requires strict alignment and also forces write responses to come from
> > + * the endpoint.
> > + */
>
> Mind if I tweak the second sentence to be:
>
> This is different from "Device-nGnR[nE]" memory which is intended for MMIO
> and thus forbids speculation, preserves access size, requires strict
> alignment and can also force write responses to come from the endpoint.
>
> ? It's a small change, but it better fits with the arm64 terminology
> ("strongly ordered" is no longer used in the architecture).
>
> If you're happy with that, I can make the change and queue this patch
> for 5.4.
FWIW, with that wording:
Acked-by: Mark Rutland <[email protected]>
Mark.
On Fri, Aug 16, 2019 at 06:31:18PM +0100, Will Deacon wrote:
> Mind if I tweak the second sentence to be:
>
> This is different from "Device-nGnR[nE]" memory which is intended for MMIO
> and thus forbids speculation, preserves access size, requires strict
> alignment and can also force write responses to come from the endpoint.
>
> ? It's a small change, but it better fits with the arm64 terminology
> ("strongly ordered" is no longer used in the architecture).
>
> If you're happy with that, I can make the change and queue this patch
> for 5.4.
I'm fine with the change, but you really need this series as base,
as there is no pgprot_dmacoherent before the series. So I think I'll
have to queue it up if we want it for 5.4, and I'll need a few more
reviews for the other patches in this series first.
On Fri, Aug 16, 2019 at 07:59:42PM +0200, Christoph Hellwig wrote:
> On Fri, Aug 16, 2019 at 06:31:18PM +0100, Will Deacon wrote:
> > Mind if I tweak the second sentence to be:
> >
> > This is different from "Device-nGnR[nE]" memory which is intended for MMIO
> > and thus forbids speculation, preserves access size, requires strict
> > alignment and can also force write responses to come from the endpoint.
> >
> > ? It's a small change, but it better fits with the arm64 terminology
> > ("strongly ordered" is no longer used in the architecture).
> >
> > If you're happy with that, I can make the change and queue this patch
> > for 5.4.
>
> I'm fine with the change, but you really need this series as base,
> as there is no pgprot_dmacoherent before the series. So I think I'll
> have to queue it up if we want it for 5.4, and I'll need a few more
> reviews for the other patches in this series first.
Ah, I didn't think about the contextual stuff. In which case, with my
change in wording:
Acked-by: Will Deacon <[email protected]>
and feel free to route it with the rest.
Thanks,
Will
Hi Christoph,
On Fri, Aug 16, 2019 at 09:07:48AM +0200, Christoph Hellwig wrote:
> I'd still like to hear a confirmation from the mips folks how
> the write combibe attribute can or can't work with the KSEG1
> uncached segment.
Quoting section 4.8 "Cacheability and Coherency Attributes and Access
Types" of "MIPS Architecture Volume 1: Introduction to the MIPS32
Architecture" (MD00080, revision 6.01):
https://www.mips.com/?do-download=introduction-to-the-mips32-architecture-v6-01
> Memory access types are specified by architecturally-defined and
> implementation-specific Cacheability and Coherency Attribute bits
> (CCAs) generated by the MMU for the access.
>
> Slightly different cacheability and coherency attributes such as
> “cached coherent, update on write” and “cached coherent, exclusive on
> write” can map to the same memory access type; in this case they both
> map to cached coherent. In order to map to the same access type, the
> fundamental mechanisms of both CCAs must be the same.
>
> When the operation of the instruction is affected, the instructions
> are described in terms of memory access types. The load and store
> operations in a processor proceed according to the specific CCA of the
> reference, however, and the pseudocode for load and store common
> functions uses the CCA value rather than the corresponding memory
> access type.
So I believe uncached & uncached accelerated are another case like that
described above - they're 2 different CCAs but the same "access type",
namely uncached.
Section 4.9 then goes on to forbid mixing access types, but not CCAs.
It would be nice if the precise mapping from CCA to access type was
provided, but I don't see that anywhere. I can check with the
architecture team to be sure, but to my knowledge we're fine to mix
access via kseg1 (ie. uncached) & mappings with CCA=7 (uncached
accelerated).
Thanks,
Paul
On Fri, Aug 23, 2019 at 09:58:04PM +0000, Paul Burton wrote:
> So I believe uncached & uncached accelerated are another case like that
> described above - they're 2 different CCAs but the same "access type",
> namely uncached.
>
> Section 4.9 then goes on to forbid mixing access types, but not CCAs.
>
> It would be nice if the precise mapping from CCA to access type was
> provided, but I don't see that anywhere. I can check with the
> architecture team to be sure, but to my knowledge we're fine to mix
> access via kseg1 (ie. uncached) & mappings with CCA=7 (uncached
> accelerated).
Ok. Looks like we can keep it then and I'll add a comment to the
code with the above reference.