Christoph, Thomas, is something like this (without the diagnosic
information included in this patch) acceptable for these allocations?
Adding expansion support when the pool is half depleted wouldn't be *that*
hard.
Or are there alternatives we should consider? Thanks!
When AMD SEV is enabled in the guest, all allocations through
dma_pool_alloc_page() must call set_memory_decrypted() for unencrypted
DMA. This includes dma_pool_alloc() and dma_direct_alloc_pages(). These
calls may block which is not allowed in atomic allocation contexts such as
from the NVMe driver.
Preallocate a complementary unecrypted DMA atomic pool that is initially
4MB in size. This patch does not contain dynamic expansion, but that
could be added if necessary.
In our stress testing, our peak unecrypted DMA atomic allocation
requirements is ~1.4MB, so 4MB is plenty. This pool is similar to the
existing DMA atomic pool but is unencrypted.
Signed-off-by: David Rientjes <[email protected]>
---
Based on v5.4 HEAD.
This commit contains diagnostic information and is not intended for use
in a production environment.
arch/x86/Kconfig | 1 +
drivers/iommu/dma-iommu.c | 5 +-
include/linux/dma-mapping.h | 7 ++-
kernel/dma/direct.c | 16 ++++-
kernel/dma/remap.c | 116 ++++++++++++++++++++++++++----------
5 files changed, 108 insertions(+), 37 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1530,6 +1530,7 @@ config X86_CPA_STATISTICS
config AMD_MEM_ENCRYPT
bool "AMD Secure Memory Encryption (SME) support"
depends on X86_64 && CPU_SUP_AMD
+ select DMA_DIRECT_REMAP
select DYNAMIC_PHYSICAL_MASK
select ARCH_USE_MEMREMAP_PROT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -928,7 +928,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
/* Non-coherent atomic allocation? Easy */
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_free_from_pool(cpu_addr, alloc_size))
+ dma_free_from_pool(dev, cpu_addr, alloc_size))
return;
if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
@@ -1011,7 +1011,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
!gfpflags_allow_blocking(gfp) && !coherent)
- cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
+ cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page,
+ gfp);
else
cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
if (!cpu_addr)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -629,9 +629,10 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
pgprot_t prot, const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size);
-bool dma_in_atomic_pool(void *start, size_t size);
-void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
-bool dma_free_from_pool(void *start, size_t size);
+bool dma_in_atomic_pool(struct device *dev, void *start, size_t size);
+void *dma_alloc_from_pool(struct device *dev, size_t size,
+ struct page **ret_page, gfp_t flags);
+bool dma_free_from_pool(struct device *dev, void *start, size_t size);
int
dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr,
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -10,6 +10,7 @@
#include <linux/dma-direct.h>
#include <linux/scatterlist.h>
#include <linux/dma-contiguous.h>
+#include <linux/dma-mapping.h>
#include <linux/dma-noncoherent.h>
#include <linux/pfn.h>
#include <linux/set_memory.h>
@@ -131,6 +132,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
struct page *page;
void *ret;
+ if (!gfpflags_allow_blocking(gfp) && force_dma_unencrypted(dev)) {
+ ret = dma_alloc_from_pool(dev, size, &page, gfp);
+ if (!ret)
+ return NULL;
+ goto done;
+ }
+
page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
if (!page)
return NULL;
@@ -156,7 +164,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
__dma_direct_free_pages(dev, size, page);
return NULL;
}
-
+done:
ret = page_address(page);
if (force_dma_unencrypted(dev)) {
set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
@@ -185,6 +193,12 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
{
unsigned int page_order = get_order(size);
+ if (force_dma_unencrypted(dev) &&
+ dma_in_atomic_pool(dev, cpu_addr, size)) {
+ dma_free_from_pool(dev, cpu_addr, size);
+ return;
+ }
+
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
!force_dma_unencrypted(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -8,6 +8,7 @@
#include <linux/dma-contiguous.h>
#include <linux/init.h>
#include <linux/genalloc.h>
+#include <linux/set_memory.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -100,9 +101,11 @@ void dma_common_free_remap(void *cpu_addr, size_t size)
#ifdef CONFIG_DMA_DIRECT_REMAP
static struct gen_pool *atomic_pool __ro_after_init;
+static struct gen_pool *atomic_pool_unencrypted __ro_after_init;
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
+static size_t atomic_pool_unencrypted_size __initdata = SZ_4M;
static int __init early_coherent_pool(char *p)
{
@@ -120,10 +123,11 @@ static gfp_t dma_atomic_pool_gfp(void)
return GFP_KERNEL;
}
-static int __init dma_atomic_pool_init(void)
+static int __init __dma_atomic_pool_init(struct gen_pool **pool,
+ size_t pool_size, bool unencrypt)
{
- unsigned int pool_size_order = get_order(atomic_pool_size);
- unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
+ unsigned int pool_size_order = get_order(pool_size);
+ unsigned long nr_pages = pool_size >> PAGE_SHIFT;
struct page *page;
void *addr;
int ret;
@@ -136,78 +140,128 @@ static int __init dma_atomic_pool_init(void)
if (!page)
goto out;
- arch_dma_prep_coherent(page, atomic_pool_size);
+ arch_dma_prep_coherent(page, pool_size);
- atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
- if (!atomic_pool)
+ *pool = gen_pool_create(PAGE_SHIFT, -1);
+ if (!*pool)
goto free_page;
- addr = dma_common_contiguous_remap(page, atomic_pool_size,
+ addr = dma_common_contiguous_remap(page, pool_size,
pgprot_dmacoherent(PAGE_KERNEL),
__builtin_return_address(0));
if (!addr)
goto destroy_genpool;
- ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
- page_to_phys(page), atomic_pool_size, -1);
+ ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page),
+ pool_size, -1);
if (ret)
goto remove_mapping;
- gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
+ gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
+ if (unencrypt)
+ set_memory_decrypted((unsigned long)page_to_virt(page), nr_pages);
- pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
- atomic_pool_size / 1024);
+ pr_info("DMA: preallocated %zu KiB pool for atomic allocations%s\n",
+ pool_size >> 10, unencrypt ? " (unencrypted)" : "");
return 0;
remove_mapping:
- dma_common_free_remap(addr, atomic_pool_size);
+ dma_common_free_remap(addr, pool_size);
destroy_genpool:
- gen_pool_destroy(atomic_pool);
- atomic_pool = NULL;
+ gen_pool_destroy(*pool);
+ *pool = NULL;
free_page:
if (!dma_release_from_contiguous(NULL, page, nr_pages))
__free_pages(page, pool_size_order);
out:
- pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
- atomic_pool_size / 1024);
+ pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation%s\n",
+ pool_size >> 10, unencrypt ? " (unencrypted)" : "");
return -ENOMEM;
}
+
+static int __init dma_atomic_pool_init(void)
+{
+ int ret;
+
+ ret = __dma_atomic_pool_init(&atomic_pool, atomic_pool_size, false);
+ if (ret)
+ return ret;
+ return __dma_atomic_pool_init(&atomic_pool_unencrypted,
+ atomic_pool_unencrypted_size, true);
+}
postcore_initcall(dma_atomic_pool_init);
-bool dma_in_atomic_pool(void *start, size_t size)
+static inline struct gen_pool *dev_to_pool(struct device *dev)
{
- if (unlikely(!atomic_pool))
- return false;
+ if (force_dma_unencrypted(dev))
+ return atomic_pool_unencrypted;
+ return atomic_pool;
+}
+
+bool dma_in_atomic_pool(struct device *dev, void *start, size_t size)
+{
+ struct gen_pool *pool = dev_to_pool(dev);
- return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+ if (unlikely(!pool))
+ return false;
+ return addr_in_gen_pool(pool, (unsigned long)start, size);
}
-void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
+static struct gen_pool *atomic_pool __ro_after_init;
+static size_t encrypted_pool_size;
+static size_t encrypted_pool_size_max;
+static spinlock_t encrypted_pool_size_lock;
+
+void *dma_alloc_from_pool(struct device *dev, size_t size,
+ struct page **ret_page, gfp_t flags)
{
+ struct gen_pool *pool = dev_to_pool(dev);
unsigned long val;
void *ptr = NULL;
- if (!atomic_pool) {
- WARN(1, "coherent pool not initialised!\n");
+ if (!pool) {
+ WARN(1, "%scoherent pool not initialised!\n",
+ force_dma_unencrypted(dev) ? "encrypted " : "");
return NULL;
}
- val = gen_pool_alloc(atomic_pool, size);
+ val = gen_pool_alloc(pool, size);
if (val) {
- phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
+ phys_addr_t phys = gen_pool_virt_to_phys(pool, val);
*ret_page = pfn_to_page(__phys_to_pfn(phys));
ptr = (void *)val;
memset(ptr, 0, size);
+ if (force_dma_unencrypted(dev)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&encrypted_pool_size_lock, flags);
+ encrypted_pool_size += size;
+ if (encrypted_pool_size > encrypted_pool_size_max) {
+ encrypted_pool_size_max = encrypted_pool_size;
+ pr_info("max encrypted pool size now %lu\n",
+ encrypted_pool_size_max);
+ }
+ spin_unlock_irqrestore(&encrypted_pool_size_lock, flags);
+ }
}
return ptr;
}
-bool dma_free_from_pool(void *start, size_t size)
+bool dma_free_from_pool(struct device *dev, void *start, size_t size)
{
- if (!dma_in_atomic_pool(start, size))
+ struct gen_pool *pool = dev_to_pool(dev);
+
+ if (!dma_in_atomic_pool(dev, start, size))
return false;
- gen_pool_free(atomic_pool, (unsigned long)start, size);
+ gen_pool_free(pool, (unsigned long)start, size);
+ if (force_dma_unencrypted(dev)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&encrypted_pool_size_lock, flags);
+ encrypted_pool_size -= size;
+ spin_unlock_irqrestore(&encrypted_pool_size_lock, flags);
+ }
return true;
}
@@ -220,7 +274,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
size = PAGE_ALIGN(size);
if (!gfpflags_allow_blocking(flags)) {
- ret = dma_alloc_from_pool(size, &page, flags);
+ ret = dma_alloc_from_pool(dev, size, &page, flags);
if (!ret)
return NULL;
goto done;
@@ -251,7 +305,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
void arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
- if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
+ if (!dma_free_from_pool(dev, vaddr, PAGE_ALIGN(size))) {
phys_addr_t phys = dma_to_phys(dev, dma_handle);
struct page *page = pfn_to_page(__phys_to_pfn(phys));
On 01/01/2020 1:54 am, David Rientjes via iommu wrote:
> Christoph, Thomas, is something like this (without the diagnosic
> information included in this patch) acceptable for these allocations?
> Adding expansion support when the pool is half depleted wouldn't be *that*
> hard.
>
> Or are there alternatives we should consider? Thanks!
Are there any platforms which require both non-cacheable remapping *and*
unencrypted remapping for distinct subsets of devices?
If not (and I'm assuming there aren't, because otherwise this patch is
incomplete in covering only 2 of the 3 possible combinations), then
couldn't we keep things simpler by just attributing both properties to
the single "atomic pool" on the basis that one or the other will always
be a no-op? In other words, basically just tweaking the existing
"!coherent" tests to "!coherent || force_dma_unencrypted()" and doing
set_dma_unencrypted() unconditionally in atomic_pool_init().
Robin.
> When AMD SEV is enabled in the guest, all allocations through
> dma_pool_alloc_page() must call set_memory_decrypted() for unencrypted
> DMA. This includes dma_pool_alloc() and dma_direct_alloc_pages(). These
> calls may block which is not allowed in atomic allocation contexts such as
> from the NVMe driver.
>
> Preallocate a complementary unecrypted DMA atomic pool that is initially
> 4MB in size. This patch does not contain dynamic expansion, but that
> could be added if necessary.
>
> In our stress testing, our peak unecrypted DMA atomic allocation
> requirements is ~1.4MB, so 4MB is plenty. This pool is similar to the
> existing DMA atomic pool but is unencrypted.
>
> Signed-off-by: David Rientjes <[email protected]>
> ---
> Based on v5.4 HEAD.
>
> This commit contains diagnostic information and is not intended for use
> in a production environment.
>
> arch/x86/Kconfig | 1 +
> drivers/iommu/dma-iommu.c | 5 +-
> include/linux/dma-mapping.h | 7 ++-
> kernel/dma/direct.c | 16 ++++-
> kernel/dma/remap.c | 116 ++++++++++++++++++++++++++----------
> 5 files changed, 108 insertions(+), 37 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -1530,6 +1530,7 @@ config X86_CPA_STATISTICS
> config AMD_MEM_ENCRYPT
> bool "AMD Secure Memory Encryption (SME) support"
> depends on X86_64 && CPU_SUP_AMD
> + select DMA_DIRECT_REMAP
> select DYNAMIC_PHYSICAL_MASK
> select ARCH_USE_MEMREMAP_PROT
> select ARCH_HAS_FORCE_DMA_UNENCRYPTED
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -928,7 +928,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
>
> /* Non-coherent atomic allocation? Easy */
> if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> - dma_free_from_pool(cpu_addr, alloc_size))
> + dma_free_from_pool(dev, cpu_addr, alloc_size))
> return;
>
> if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
> @@ -1011,7 +1011,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
>
> if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> !gfpflags_allow_blocking(gfp) && !coherent)
> - cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
> + cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page,
> + gfp);
> else
> cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
> if (!cpu_addr)
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -629,9 +629,10 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
> pgprot_t prot, const void *caller);
> void dma_common_free_remap(void *cpu_addr, size_t size);
>
> -bool dma_in_atomic_pool(void *start, size_t size);
> -void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
> -bool dma_free_from_pool(void *start, size_t size);
> +bool dma_in_atomic_pool(struct device *dev, void *start, size_t size);
> +void *dma_alloc_from_pool(struct device *dev, size_t size,
> + struct page **ret_page, gfp_t flags);
> +bool dma_free_from_pool(struct device *dev, void *start, size_t size);
>
> int
> dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr,
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -10,6 +10,7 @@
> #include <linux/dma-direct.h>
> #include <linux/scatterlist.h>
> #include <linux/dma-contiguous.h>
> +#include <linux/dma-mapping.h>
> #include <linux/dma-noncoherent.h>
> #include <linux/pfn.h>
> #include <linux/set_memory.h>
> @@ -131,6 +132,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> struct page *page;
> void *ret;
>
> + if (!gfpflags_allow_blocking(gfp) && force_dma_unencrypted(dev)) {
> + ret = dma_alloc_from_pool(dev, size, &page, gfp);
> + if (!ret)
> + return NULL;
> + goto done;
> + }
> +
> page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
> if (!page)
> return NULL;
> @@ -156,7 +164,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> __dma_direct_free_pages(dev, size, page);
> return NULL;
> }
> -
> +done:
> ret = page_address(page);
> if (force_dma_unencrypted(dev)) {
> set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
> @@ -185,6 +193,12 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> {
> unsigned int page_order = get_order(size);
>
> + if (force_dma_unencrypted(dev) &&
> + dma_in_atomic_pool(dev, cpu_addr, size)) {
> + dma_free_from_pool(dev, cpu_addr, size);
> + return;
> + }
> +
> if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
> !force_dma_unencrypted(dev)) {
> /* cpu_addr is a struct page cookie, not a kernel address */
> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> --- a/kernel/dma/remap.c
> +++ b/kernel/dma/remap.c
> @@ -8,6 +8,7 @@
> #include <linux/dma-contiguous.h>
> #include <linux/init.h>
> #include <linux/genalloc.h>
> +#include <linux/set_memory.h>
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
>
> @@ -100,9 +101,11 @@ void dma_common_free_remap(void *cpu_addr, size_t size)
>
> #ifdef CONFIG_DMA_DIRECT_REMAP
> static struct gen_pool *atomic_pool __ro_after_init;
> +static struct gen_pool *atomic_pool_unencrypted __ro_after_init;
>
> #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
> static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
> +static size_t atomic_pool_unencrypted_size __initdata = SZ_4M;
>
> static int __init early_coherent_pool(char *p)
> {
> @@ -120,10 +123,11 @@ static gfp_t dma_atomic_pool_gfp(void)
> return GFP_KERNEL;
> }
>
> -static int __init dma_atomic_pool_init(void)
> +static int __init __dma_atomic_pool_init(struct gen_pool **pool,
> + size_t pool_size, bool unencrypt)
> {
> - unsigned int pool_size_order = get_order(atomic_pool_size);
> - unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
> + unsigned int pool_size_order = get_order(pool_size);
> + unsigned long nr_pages = pool_size >> PAGE_SHIFT;
> struct page *page;
> void *addr;
> int ret;
> @@ -136,78 +140,128 @@ static int __init dma_atomic_pool_init(void)
> if (!page)
> goto out;
>
> - arch_dma_prep_coherent(page, atomic_pool_size);
> + arch_dma_prep_coherent(page, pool_size);
>
> - atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
> - if (!atomic_pool)
> + *pool = gen_pool_create(PAGE_SHIFT, -1);
> + if (!*pool)
> goto free_page;
>
> - addr = dma_common_contiguous_remap(page, atomic_pool_size,
> + addr = dma_common_contiguous_remap(page, pool_size,
> pgprot_dmacoherent(PAGE_KERNEL),
> __builtin_return_address(0));
> if (!addr)
> goto destroy_genpool;
>
> - ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
> - page_to_phys(page), atomic_pool_size, -1);
> + ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page),
> + pool_size, -1);
> if (ret)
> goto remove_mapping;
> - gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
> + gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
> + if (unencrypt)
> + set_memory_decrypted((unsigned long)page_to_virt(page), nr_pages);
>
> - pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
> - atomic_pool_size / 1024);
> + pr_info("DMA: preallocated %zu KiB pool for atomic allocations%s\n",
> + pool_size >> 10, unencrypt ? " (unencrypted)" : "");
> return 0;
>
> remove_mapping:
> - dma_common_free_remap(addr, atomic_pool_size);
> + dma_common_free_remap(addr, pool_size);
> destroy_genpool:
> - gen_pool_destroy(atomic_pool);
> - atomic_pool = NULL;
> + gen_pool_destroy(*pool);
> + *pool = NULL;
> free_page:
> if (!dma_release_from_contiguous(NULL, page, nr_pages))
> __free_pages(page, pool_size_order);
> out:
> - pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
> - atomic_pool_size / 1024);
> + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation%s\n",
> + pool_size >> 10, unencrypt ? " (unencrypted)" : "");
> return -ENOMEM;
> }
> +
> +static int __init dma_atomic_pool_init(void)
> +{
> + int ret;
> +
> + ret = __dma_atomic_pool_init(&atomic_pool, atomic_pool_size, false);
> + if (ret)
> + return ret;
> + return __dma_atomic_pool_init(&atomic_pool_unencrypted,
> + atomic_pool_unencrypted_size, true);
> +}
> postcore_initcall(dma_atomic_pool_init);
>
> -bool dma_in_atomic_pool(void *start, size_t size)
> +static inline struct gen_pool *dev_to_pool(struct device *dev)
> {
> - if (unlikely(!atomic_pool))
> - return false;
> + if (force_dma_unencrypted(dev))
> + return atomic_pool_unencrypted;
> + return atomic_pool;
> +}
> +
> +bool dma_in_atomic_pool(struct device *dev, void *start, size_t size)
> +{
> + struct gen_pool *pool = dev_to_pool(dev);
>
> - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
> + if (unlikely(!pool))
> + return false;
> + return addr_in_gen_pool(pool, (unsigned long)start, size);
> }
>
> -void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
> +static struct gen_pool *atomic_pool __ro_after_init;
> +static size_t encrypted_pool_size;
> +static size_t encrypted_pool_size_max;
> +static spinlock_t encrypted_pool_size_lock;
> +
> +void *dma_alloc_from_pool(struct device *dev, size_t size,
> + struct page **ret_page, gfp_t flags)
> {
> + struct gen_pool *pool = dev_to_pool(dev);
> unsigned long val;
> void *ptr = NULL;
>
> - if (!atomic_pool) {
> - WARN(1, "coherent pool not initialised!\n");
> + if (!pool) {
> + WARN(1, "%scoherent pool not initialised!\n",
> + force_dma_unencrypted(dev) ? "encrypted " : "");
> return NULL;
> }
>
> - val = gen_pool_alloc(atomic_pool, size);
> + val = gen_pool_alloc(pool, size);
> if (val) {
> - phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
> + phys_addr_t phys = gen_pool_virt_to_phys(pool, val);
>
> *ret_page = pfn_to_page(__phys_to_pfn(phys));
> ptr = (void *)val;
> memset(ptr, 0, size);
> + if (force_dma_unencrypted(dev)) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&encrypted_pool_size_lock, flags);
> + encrypted_pool_size += size;
> + if (encrypted_pool_size > encrypted_pool_size_max) {
> + encrypted_pool_size_max = encrypted_pool_size;
> + pr_info("max encrypted pool size now %lu\n",
> + encrypted_pool_size_max);
> + }
> + spin_unlock_irqrestore(&encrypted_pool_size_lock, flags);
> + }
> }
>
> return ptr;
> }
>
> -bool dma_free_from_pool(void *start, size_t size)
> +bool dma_free_from_pool(struct device *dev, void *start, size_t size)
> {
> - if (!dma_in_atomic_pool(start, size))
> + struct gen_pool *pool = dev_to_pool(dev);
> +
> + if (!dma_in_atomic_pool(dev, start, size))
> return false;
> - gen_pool_free(atomic_pool, (unsigned long)start, size);
> + gen_pool_free(pool, (unsigned long)start, size);
> + if (force_dma_unencrypted(dev)) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&encrypted_pool_size_lock, flags);
> + encrypted_pool_size -= size;
> + spin_unlock_irqrestore(&encrypted_pool_size_lock, flags);
> + }
> return true;
> }
>
> @@ -220,7 +274,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> size = PAGE_ALIGN(size);
>
> if (!gfpflags_allow_blocking(flags)) {
> - ret = dma_alloc_from_pool(size, &page, flags);
> + ret = dma_alloc_from_pool(dev, size, &page, flags);
> if (!ret)
> return NULL;
> goto done;
> @@ -251,7 +305,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> dma_addr_t dma_handle, unsigned long attrs)
> {
> - if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
> + if (!dma_free_from_pool(dev, vaddr, PAGE_ALIGN(size))) {
> phys_addr_t phys = dma_to_phys(dev, dma_handle);
> struct page *page = pfn_to_page(__phys_to_pfn(phys));
>
> _______________________________________________
> iommu mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>
On Mon, Jan 06, 2020 at 05:34:00PM +0000, Robin Murphy wrote:
> On 01/01/2020 1:54 am, David Rientjes via iommu wrote:
>> Christoph, Thomas, is something like this (without the diagnosic
>> information included in this patch) acceptable for these allocations?
>> Adding expansion support when the pool is half depleted wouldn't be *that*
>> hard.
>>
>> Or are there alternatives we should consider? Thanks!
>
> Are there any platforms which require both non-cacheable remapping *and*
> unencrypted remapping for distinct subsets of devices?
>
> If not (and I'm assuming there aren't, because otherwise this patch is
> incomplete in covering only 2 of the 3 possible combinations), then
> couldn't we keep things simpler by just attributing both properties to the
> single "atomic pool" on the basis that one or the other will always be a
> no-op? In other words, basically just tweaking the existing "!coherent"
> tests to "!coherent || force_dma_unencrypted()" and doing
> set_dma_unencrypted() unconditionally in atomic_pool_init().
I think that would make most sense.
On Tue, 7 Jan 2020, Christoph Hellwig wrote:
> > On 01/01/2020 1:54 am, David Rientjes via iommu wrote:
> >> Christoph, Thomas, is something like this (without the diagnosic
> >> information included in this patch) acceptable for these allocations?
> >> Adding expansion support when the pool is half depleted wouldn't be *that*
> >> hard.
> >>
> >> Or are there alternatives we should consider? Thanks!
> >
> > Are there any platforms which require both non-cacheable remapping *and*
> > unencrypted remapping for distinct subsets of devices?
> >
> > If not (and I'm assuming there aren't, because otherwise this patch is
> > incomplete in covering only 2 of the 3 possible combinations), then
> > couldn't we keep things simpler by just attributing both properties to the
> > single "atomic pool" on the basis that one or the other will always be a
> > no-op? In other words, basically just tweaking the existing "!coherent"
> > tests to "!coherent || force_dma_unencrypted()" and doing
> > set_dma_unencrypted() unconditionally in atomic_pool_init().
>
> I think that would make most sense.
>
I'll rely on Thomas to chime in if this doesn't make sense for the SEV
usecase.
I think the sizing of the single atomic pool needs to be determined. Our
peak usage that we have measured from NVMe is ~1.4MB and atomic_pool is
currently sized to 256KB by default. I'm unsure at this time if we need
to be able to dynamically expand this pool with a kworker.
Maybe when CONFIG_AMD_MEM_ENCRYPT is enabled this atomic pool should be
sized to 2MB or so and then when it reaches half capacity we schedule some
background work to dynamically increase it? That wouldn't be hard unless
the pool can be rapidly depleted.
Do we want to increase the atomic pool size by default and then do
background dynamic expansion?
On Tue, Jan 07, 2020 at 11:57:24AM -0800, David Rientjes wrote:
> I'll rely on Thomas to chime in if this doesn't make sense for the SEV
> usecase.
>
> I think the sizing of the single atomic pool needs to be determined. Our
> peak usage that we have measured from NVMe is ~1.4MB and atomic_pool is
> currently sized to 256KB by default. I'm unsure at this time if we need
> to be able to dynamically expand this pool with a kworker.
>
> Maybe when CONFIG_AMD_MEM_ENCRYPT is enabled this atomic pool should be
> sized to 2MB or so and then when it reaches half capacity we schedule some
> background work to dynamically increase it? That wouldn't be hard unless
> the pool can be rapidly depleted.
>
Note that a non-coherent architecture with the same workload would need
the same size.
> Do we want to increase the atomic pool size by default and then do
> background dynamic expansion?
For now I'd just scale with system memory size.
On Thu, 9 Jan 2020, Christoph Hellwig wrote:
> > I'll rely on Thomas to chime in if this doesn't make sense for the SEV
> > usecase.
> >
> > I think the sizing of the single atomic pool needs to be determined. Our
> > peak usage that we have measured from NVMe is ~1.4MB and atomic_pool is
> > currently sized to 256KB by default. I'm unsure at this time if we need
> > to be able to dynamically expand this pool with a kworker.
> >
> > Maybe when CONFIG_AMD_MEM_ENCRYPT is enabled this atomic pool should be
> > sized to 2MB or so and then when it reaches half capacity we schedule some
> > background work to dynamically increase it? That wouldn't be hard unless
> > the pool can be rapidly depleted.
> >
>
> Note that a non-coherent architecture with the same workload would need
> the same size.
>
> > Do we want to increase the atomic pool size by default and then do
> > background dynamic expansion?
>
> For now I'd just scale with system memory size.
>
Thanks Christoph and Robin for the help, we're running some additional
stress tests to double check that our required amount of memory from this
pool is accurate. Once that's done, I'll refresh the patch with th
suggestions and propose it formally.
On 12/31/19 7:54 PM, David Rientjes wrote:
> Christoph, Thomas, is something like this (without the diagnosic
> information included in this patch) acceptable for these allocations?
> Adding expansion support when the pool is half depleted wouldn't be *that*
> hard.
Sorry for the delay in responding... Overall, I think this will work
well. If you want the default size to be adjustable, you can always go
with a Kconfig setting or a command line parameter or both (I realize the
command line parameter is not optimal).
Just a couple of overall comments about the use of variable names and
messages using both unencrypted and encrypted, I think the use should be
consistent throughout the patch.
Thanks,
Tom
>
> Or are there alternatives we should consider? Thanks!
>
>
>
>
> When AMD SEV is enabled in the guest, all allocations through
> dma_pool_alloc_page() must call set_memory_decrypted() for unencrypted
> DMA. This includes dma_pool_alloc() and dma_direct_alloc_pages(). These
> calls may block which is not allowed in atomic allocation contexts such as
> from the NVMe driver.
>
> Preallocate a complementary unecrypted DMA atomic pool that is initially
> 4MB in size. This patch does not contain dynamic expansion, but that
> could be added if necessary.
>
> In our stress testing, our peak unecrypted DMA atomic allocation
> requirements is ~1.4MB, so 4MB is plenty. This pool is similar to the
> existing DMA atomic pool but is unencrypted.
>
> Signed-off-by: David Rientjes <[email protected]>
> ---
> Based on v5.4 HEAD.
>
> This commit contains diagnostic information and is not intended for use
> in a production environment.
>
> arch/x86/Kconfig | 1 +
> drivers/iommu/dma-iommu.c | 5 +-
> include/linux/dma-mapping.h | 7 ++-
> kernel/dma/direct.c | 16 ++++-
> kernel/dma/remap.c | 116 ++++++++++++++++++++++++++----------
> 5 files changed, 108 insertions(+), 37 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -1530,6 +1530,7 @@ config X86_CPA_STATISTICS
> config AMD_MEM_ENCRYPT
> bool "AMD Secure Memory Encryption (SME) support"
> depends on X86_64 && CPU_SUP_AMD
> + select DMA_DIRECT_REMAP
> select DYNAMIC_PHYSICAL_MASK
> select ARCH_USE_MEMREMAP_PROT
> select ARCH_HAS_FORCE_DMA_UNENCRYPTED
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -928,7 +928,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
>
> /* Non-coherent atomic allocation? Easy */
> if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> - dma_free_from_pool(cpu_addr, alloc_size))
> + dma_free_from_pool(dev, cpu_addr, alloc_size))
> return;
>
> if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
> @@ -1011,7 +1011,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
>
> if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> !gfpflags_allow_blocking(gfp) && !coherent)
> - cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
> + cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page,
> + gfp);
> else
> cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
> if (!cpu_addr)
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -629,9 +629,10 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
> pgprot_t prot, const void *caller);
> void dma_common_free_remap(void *cpu_addr, size_t size);
>
> -bool dma_in_atomic_pool(void *start, size_t size);
> -void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
> -bool dma_free_from_pool(void *start, size_t size);
> +bool dma_in_atomic_pool(struct device *dev, void *start, size_t size);
> +void *dma_alloc_from_pool(struct device *dev, size_t size,
> + struct page **ret_page, gfp_t flags);
> +bool dma_free_from_pool(struct device *dev, void *start, size_t size);
>
> int
> dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr,
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -10,6 +10,7 @@
> #include <linux/dma-direct.h>
> #include <linux/scatterlist.h>
> #include <linux/dma-contiguous.h>
> +#include <linux/dma-mapping.h>
> #include <linux/dma-noncoherent.h>
> #include <linux/pfn.h>
> #include <linux/set_memory.h>
> @@ -131,6 +132,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> struct page *page;
> void *ret;
>
> + if (!gfpflags_allow_blocking(gfp) && force_dma_unencrypted(dev)) {
> + ret = dma_alloc_from_pool(dev, size, &page, gfp);
> + if (!ret)
> + return NULL;
> + goto done;
> + }
> +
> page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
> if (!page)
> return NULL;
> @@ -156,7 +164,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> __dma_direct_free_pages(dev, size, page);
> return NULL;
> }
> -
> +done:
> ret = page_address(page);
> if (force_dma_unencrypted(dev)) {
> set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
> @@ -185,6 +193,12 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> {
> unsigned int page_order = get_order(size);
>
> + if (force_dma_unencrypted(dev) &&
> + dma_in_atomic_pool(dev, cpu_addr, size)) {
> + dma_free_from_pool(dev, cpu_addr, size);
> + return;
> + }
> +
> if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
> !force_dma_unencrypted(dev)) {
> /* cpu_addr is a struct page cookie, not a kernel address */
> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> --- a/kernel/dma/remap.c
> +++ b/kernel/dma/remap.c
> @@ -8,6 +8,7 @@
> #include <linux/dma-contiguous.h>
> #include <linux/init.h>
> #include <linux/genalloc.h>
> +#include <linux/set_memory.h>
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
>
> @@ -100,9 +101,11 @@ void dma_common_free_remap(void *cpu_addr, size_t size)
>
> #ifdef CONFIG_DMA_DIRECT_REMAP
> static struct gen_pool *atomic_pool __ro_after_init;
> +static struct gen_pool *atomic_pool_unencrypted __ro_after_init;
>
> #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
> static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
> +static size_t atomic_pool_unencrypted_size __initdata = SZ_4M;
>
> static int __init early_coherent_pool(char *p)
> {
> @@ -120,10 +123,11 @@ static gfp_t dma_atomic_pool_gfp(void)
> return GFP_KERNEL;
> }
>
> -static int __init dma_atomic_pool_init(void)
> +static int __init __dma_atomic_pool_init(struct gen_pool **pool,
> + size_t pool_size, bool unencrypt)
> {
> - unsigned int pool_size_order = get_order(atomic_pool_size);
> - unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
> + unsigned int pool_size_order = get_order(pool_size);
> + unsigned long nr_pages = pool_size >> PAGE_SHIFT;
> struct page *page;
> void *addr;
> int ret;
> @@ -136,78 +140,128 @@ static int __init dma_atomic_pool_init(void)
> if (!page)
> goto out;
>
> - arch_dma_prep_coherent(page, atomic_pool_size);
> + arch_dma_prep_coherent(page, pool_size);
>
> - atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
> - if (!atomic_pool)
> + *pool = gen_pool_create(PAGE_SHIFT, -1);
> + if (!*pool)
> goto free_page;
>
> - addr = dma_common_contiguous_remap(page, atomic_pool_size,
> + addr = dma_common_contiguous_remap(page, pool_size,
> pgprot_dmacoherent(PAGE_KERNEL),
> __builtin_return_address(0));
> if (!addr)
> goto destroy_genpool;
>
> - ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
> - page_to_phys(page), atomic_pool_size, -1);
> + ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page),
> + pool_size, -1);
> if (ret)
> goto remove_mapping;
> - gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
> + gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
> + if (unencrypt)
> + set_memory_decrypted((unsigned long)page_to_virt(page), nr_pages);
>
> - pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
> - atomic_pool_size / 1024);
> + pr_info("DMA: preallocated %zu KiB pool for atomic allocations%s\n",
> + pool_size >> 10, unencrypt ? " (unencrypted)" : "");
> return 0;
>
> remove_mapping:
> - dma_common_free_remap(addr, atomic_pool_size);
> + dma_common_free_remap(addr, pool_size);
> destroy_genpool:
> - gen_pool_destroy(atomic_pool);
> - atomic_pool = NULL;
> + gen_pool_destroy(*pool);
> + *pool = NULL;
> free_page:
> if (!dma_release_from_contiguous(NULL, page, nr_pages))
> __free_pages(page, pool_size_order);
> out:
> - pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
> - atomic_pool_size / 1024);
> + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation%s\n",
> + pool_size >> 10, unencrypt ? " (unencrypted)" : "");
> return -ENOMEM;
> }
> +
> +static int __init dma_atomic_pool_init(void)
> +{
> + int ret;
> +
> + ret = __dma_atomic_pool_init(&atomic_pool, atomic_pool_size, false);
> + if (ret)
> + return ret;
> + return __dma_atomic_pool_init(&atomic_pool_unencrypted,
> + atomic_pool_unencrypted_size, true);
> +}
> postcore_initcall(dma_atomic_pool_init);
>
> -bool dma_in_atomic_pool(void *start, size_t size)
> +static inline struct gen_pool *dev_to_pool(struct device *dev)
> {
> - if (unlikely(!atomic_pool))
> - return false;
> + if (force_dma_unencrypted(dev))
> + return atomic_pool_unencrypted;
> + return atomic_pool;
> +}
> +
> +bool dma_in_atomic_pool(struct device *dev, void *start, size_t size)
> +{
> + struct gen_pool *pool = dev_to_pool(dev);
>
> - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
> + if (unlikely(!pool))
> + return false;
> + return addr_in_gen_pool(pool, (unsigned long)start, size);
> }
>
> -void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
> +static struct gen_pool *atomic_pool __ro_after_init;
> +static size_t encrypted_pool_size;
> +static size_t encrypted_pool_size_max;
> +static spinlock_t encrypted_pool_size_lock;
> +
> +void *dma_alloc_from_pool(struct device *dev, size_t size,
> + struct page **ret_page, gfp_t flags)
> {
> + struct gen_pool *pool = dev_to_pool(dev);
> unsigned long val;
> void *ptr = NULL;
>
> - if (!atomic_pool) {
> - WARN(1, "coherent pool not initialised!\n");
> + if (!pool) {
> + WARN(1, "%scoherent pool not initialised!\n",
> + force_dma_unencrypted(dev) ? "encrypted " : "");
> return NULL;
> }
>
> - val = gen_pool_alloc(atomic_pool, size);
> + val = gen_pool_alloc(pool, size);
> if (val) {
> - phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
> + phys_addr_t phys = gen_pool_virt_to_phys(pool, val);
>
> *ret_page = pfn_to_page(__phys_to_pfn(phys));
> ptr = (void *)val;
> memset(ptr, 0, size);
> + if (force_dma_unencrypted(dev)) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&encrypted_pool_size_lock, flags);
> + encrypted_pool_size += size;
> + if (encrypted_pool_size > encrypted_pool_size_max) {
> + encrypted_pool_size_max = encrypted_pool_size;
> + pr_info("max encrypted pool size now %lu\n",
> + encrypted_pool_size_max);
> + }
> + spin_unlock_irqrestore(&encrypted_pool_size_lock, flags);
> + }
> }
>
> return ptr;
> }
>
> -bool dma_free_from_pool(void *start, size_t size)
> +bool dma_free_from_pool(struct device *dev, void *start, size_t size)
> {
> - if (!dma_in_atomic_pool(start, size))
> + struct gen_pool *pool = dev_to_pool(dev);
> +
> + if (!dma_in_atomic_pool(dev, start, size))
> return false;
> - gen_pool_free(atomic_pool, (unsigned long)start, size);
> + gen_pool_free(pool, (unsigned long)start, size);
> + if (force_dma_unencrypted(dev)) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&encrypted_pool_size_lock, flags);
> + encrypted_pool_size -= size;
> + spin_unlock_irqrestore(&encrypted_pool_size_lock, flags);
> + }
> return true;
> }
>
> @@ -220,7 +274,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> size = PAGE_ALIGN(size);
>
> if (!gfpflags_allow_blocking(flags)) {
> - ret = dma_alloc_from_pool(size, &page, flags);
> + ret = dma_alloc_from_pool(dev, size, &page, flags);
> if (!ret)
> return NULL;
> goto done;
> @@ -251,7 +305,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> dma_addr_t dma_handle, unsigned long attrs)
> {
> - if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
> + if (!dma_free_from_pool(dev, vaddr, PAGE_ALIGN(size))) {
> phys_addr_t phys = dma_to_phys(dev, dma_handle);
> struct page *page = pfn_to_page(__phys_to_pfn(phys));
>
>
On Fri, 17 Jan 2020, Tom Lendacky wrote:
> On 12/31/19 7:54 PM, David Rientjes wrote:
> > Christoph, Thomas, is something like this (without the diagnosic
> > information included in this patch) acceptable for these allocations?
> > Adding expansion support when the pool is half depleted wouldn't be *that*
> > hard.
>
> Sorry for the delay in responding... Overall, I think this will work
> well. If you want the default size to be adjustable, you can always go
> with a Kconfig setting or a command line parameter or both (I realize the
> command line parameter is not optimal).
>
Ok, so we've determined that we don't only have a dependency on GFP_DMA
memory through the DMA API in a non-blocking context that needs to be
unencrypted, but also GFP_KERNEL. We don't have a dependency on GFP_DMA32
memory (yet) but should likely provision for it.
So my patch would change by actually providing three separate pools, one
for ZONE_DMA, one for ZONE_DMA32, and one for ZONE_NORMAL. The ZONE_DMA
already exists in the form of the atomic_pool, so it would add two
additional pools that likely start out at the same size and dynamically
expand with a kworker when its usage approaches the limitatios of the
pool. I don't necessarily like needing three separate pools, but I can't
think of a better way to provide unencrypted memory for non-blocking
allocations that work for all possible device gfp masks.
My idea right now is to create all three pools instead of the single
atomic_pool, all 256KB in size, and anytime their usage reaches half their
limit, we kick off some background work to double the size of the pool
with GFP_KERNEL | __GFP_NORETRY. Our experimentation suggests that a
kworker would actually respond in time for this.
Any objections to this approach? If so, an alternative suggestion would
be appreciated :) I plan on all atomic pools to be unencrypted at the
time the allocation is successful unless there is some strange need for
non-blocking atomic allocations through the DMA API that should *not* be
encrypted.
> Just a couple of overall comments about the use of variable names and
> messages using both unencrypted and encrypted, I think the use should be
> consistent throughout the patch.
>
> Thanks,
> Tom
>
> >
> > Or are there alternatives we should consider? Thanks!
> >
> >
> >
> >
> > When AMD SEV is enabled in the guest, all allocations through
> > dma_pool_alloc_page() must call set_memory_decrypted() for unencrypted
> > DMA. This includes dma_pool_alloc() and dma_direct_alloc_pages(). These
> > calls may block which is not allowed in atomic allocation contexts such as
> > from the NVMe driver.
> >
> > Preallocate a complementary unecrypted DMA atomic pool that is initially
> > 4MB in size. This patch does not contain dynamic expansion, but that
> > could be added if necessary.
> >
> > In our stress testing, our peak unecrypted DMA atomic allocation
> > requirements is ~1.4MB, so 4MB is plenty. This pool is similar to the
> > existing DMA atomic pool but is unencrypted.
> >
> > Signed-off-by: David Rientjes <[email protected]>
> > ---
> > Based on v5.4 HEAD.
> >
> > This commit contains diagnostic information and is not intended for use
> > in a production environment.
> >
> > arch/x86/Kconfig | 1 +
> > drivers/iommu/dma-iommu.c | 5 +-
> > include/linux/dma-mapping.h | 7 ++-
> > kernel/dma/direct.c | 16 ++++-
> > kernel/dma/remap.c | 116 ++++++++++++++++++++++++++----------
> > 5 files changed, 108 insertions(+), 37 deletions(-)
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -1530,6 +1530,7 @@ config X86_CPA_STATISTICS
> > config AMD_MEM_ENCRYPT
> > bool "AMD Secure Memory Encryption (SME) support"
> > depends on X86_64 && CPU_SUP_AMD
> > + select DMA_DIRECT_REMAP
> > select DYNAMIC_PHYSICAL_MASK
> > select ARCH_USE_MEMREMAP_PROT
> > select ARCH_HAS_FORCE_DMA_UNENCRYPTED
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -928,7 +928,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
> >
> > /* Non-coherent atomic allocation? Easy */
> > if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> > - dma_free_from_pool(cpu_addr, alloc_size))
> > + dma_free_from_pool(dev, cpu_addr, alloc_size))
> > return;
> >
> > if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
> > @@ -1011,7 +1011,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
> >
> > if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> > !gfpflags_allow_blocking(gfp) && !coherent)
> > - cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
> > + cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page,
> > + gfp);
> > else
> > cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
> > if (!cpu_addr)
> > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> > --- a/include/linux/dma-mapping.h
> > +++ b/include/linux/dma-mapping.h
> > @@ -629,9 +629,10 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
> > pgprot_t prot, const void *caller);
> > void dma_common_free_remap(void *cpu_addr, size_t size);
> >
> > -bool dma_in_atomic_pool(void *start, size_t size);
> > -void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
> > -bool dma_free_from_pool(void *start, size_t size);
> > +bool dma_in_atomic_pool(struct device *dev, void *start, size_t size);
> > +void *dma_alloc_from_pool(struct device *dev, size_t size,
> > + struct page **ret_page, gfp_t flags);
> > +bool dma_free_from_pool(struct device *dev, void *start, size_t size);
> >
> > int
> > dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr,
> > diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> > --- a/kernel/dma/direct.c
> > +++ b/kernel/dma/direct.c
> > @@ -10,6 +10,7 @@
> > #include <linux/dma-direct.h>
> > #include <linux/scatterlist.h>
> > #include <linux/dma-contiguous.h>
> > +#include <linux/dma-mapping.h>
> > #include <linux/dma-noncoherent.h>
> > #include <linux/pfn.h>
> > #include <linux/set_memory.h>
> > @@ -131,6 +132,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> > struct page *page;
> > void *ret;
> >
> > + if (!gfpflags_allow_blocking(gfp) && force_dma_unencrypted(dev)) {
> > + ret = dma_alloc_from_pool(dev, size, &page, gfp);
> > + if (!ret)
> > + return NULL;
> > + goto done;
> > + }
> > +
> > page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
> > if (!page)
> > return NULL;
> > @@ -156,7 +164,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> > __dma_direct_free_pages(dev, size, page);
> > return NULL;
> > }
> > -
> > +done:
> > ret = page_address(page);
> > if (force_dma_unencrypted(dev)) {
> > set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
> > @@ -185,6 +193,12 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> > {
> > unsigned int page_order = get_order(size);
> >
> > + if (force_dma_unencrypted(dev) &&
> > + dma_in_atomic_pool(dev, cpu_addr, size)) {
> > + dma_free_from_pool(dev, cpu_addr, size);
> > + return;
> > + }
> > +
> > if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
> > !force_dma_unencrypted(dev)) {
> > /* cpu_addr is a struct page cookie, not a kernel address */
> > diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> > --- a/kernel/dma/remap.c
> > +++ b/kernel/dma/remap.c
> > @@ -8,6 +8,7 @@
> > #include <linux/dma-contiguous.h>
> > #include <linux/init.h>
> > #include <linux/genalloc.h>
> > +#include <linux/set_memory.h>
> > #include <linux/slab.h>
> > #include <linux/vmalloc.h>
> >
> > @@ -100,9 +101,11 @@ void dma_common_free_remap(void *cpu_addr, size_t size)
> >
> > #ifdef CONFIG_DMA_DIRECT_REMAP
> > static struct gen_pool *atomic_pool __ro_after_init;
> > +static struct gen_pool *atomic_pool_unencrypted __ro_after_init;
> >
> > #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
> > static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
> > +static size_t atomic_pool_unencrypted_size __initdata = SZ_4M;
> >
> > static int __init early_coherent_pool(char *p)
> > {
> > @@ -120,10 +123,11 @@ static gfp_t dma_atomic_pool_gfp(void)
> > return GFP_KERNEL;
> > }
> >
> > -static int __init dma_atomic_pool_init(void)
> > +static int __init __dma_atomic_pool_init(struct gen_pool **pool,
> > + size_t pool_size, bool unencrypt)
> > {
> > - unsigned int pool_size_order = get_order(atomic_pool_size);
> > - unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
> > + unsigned int pool_size_order = get_order(pool_size);
> > + unsigned long nr_pages = pool_size >> PAGE_SHIFT;
> > struct page *page;
> > void *addr;
> > int ret;
> > @@ -136,78 +140,128 @@ static int __init dma_atomic_pool_init(void)
> > if (!page)
> > goto out;
> >
> > - arch_dma_prep_coherent(page, atomic_pool_size);
> > + arch_dma_prep_coherent(page, pool_size);
> >
> > - atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
> > - if (!atomic_pool)
> > + *pool = gen_pool_create(PAGE_SHIFT, -1);
> > + if (!*pool)
> > goto free_page;
> >
> > - addr = dma_common_contiguous_remap(page, atomic_pool_size,
> > + addr = dma_common_contiguous_remap(page, pool_size,
> > pgprot_dmacoherent(PAGE_KERNEL),
> > __builtin_return_address(0));
> > if (!addr)
> > goto destroy_genpool;
> >
> > - ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
> > - page_to_phys(page), atomic_pool_size, -1);
> > + ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page),
> > + pool_size, -1);
> > if (ret)
> > goto remove_mapping;
> > - gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
> > + gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
> > + if (unencrypt)
> > + set_memory_decrypted((unsigned long)page_to_virt(page), nr_pages);
> >
> > - pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
> > - atomic_pool_size / 1024);
> > + pr_info("DMA: preallocated %zu KiB pool for atomic allocations%s\n",
> > + pool_size >> 10, unencrypt ? " (unencrypted)" : "");
> > return 0;
> >
> > remove_mapping:
> > - dma_common_free_remap(addr, atomic_pool_size);
> > + dma_common_free_remap(addr, pool_size);
> > destroy_genpool:
> > - gen_pool_destroy(atomic_pool);
> > - atomic_pool = NULL;
> > + gen_pool_destroy(*pool);
> > + *pool = NULL;
> > free_page:
> > if (!dma_release_from_contiguous(NULL, page, nr_pages))
> > __free_pages(page, pool_size_order);
> > out:
> > - pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
> > - atomic_pool_size / 1024);
> > + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation%s\n",
> > + pool_size >> 10, unencrypt ? " (unencrypted)" : "");
> > return -ENOMEM;
> > }
> > +
> > +static int __init dma_atomic_pool_init(void)
> > +{
> > + int ret;
> > +
> > + ret = __dma_atomic_pool_init(&atomic_pool, atomic_pool_size, false);
> > + if (ret)
> > + return ret;
> > + return __dma_atomic_pool_init(&atomic_pool_unencrypted,
> > + atomic_pool_unencrypted_size, true);
> > +}
> > postcore_initcall(dma_atomic_pool_init);
> >
> > -bool dma_in_atomic_pool(void *start, size_t size)
> > +static inline struct gen_pool *dev_to_pool(struct device *dev)
> > {
> > - if (unlikely(!atomic_pool))
> > - return false;
> > + if (force_dma_unencrypted(dev))
> > + return atomic_pool_unencrypted;
> > + return atomic_pool;
> > +}
> > +
> > +bool dma_in_atomic_pool(struct device *dev, void *start, size_t size)
> > +{
> > + struct gen_pool *pool = dev_to_pool(dev);
> >
> > - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
> > + if (unlikely(!pool))
> > + return false;
> > + return addr_in_gen_pool(pool, (unsigned long)start, size);
> > }
> >
> > -void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
> > +static struct gen_pool *atomic_pool __ro_after_init;
> > +static size_t encrypted_pool_size;
> > +static size_t encrypted_pool_size_max;
> > +static spinlock_t encrypted_pool_size_lock;
> > +
> > +void *dma_alloc_from_pool(struct device *dev, size_t size,
> > + struct page **ret_page, gfp_t flags)
> > {
> > + struct gen_pool *pool = dev_to_pool(dev);
> > unsigned long val;
> > void *ptr = NULL;
> >
> > - if (!atomic_pool) {
> > - WARN(1, "coherent pool not initialised!\n");
> > + if (!pool) {
> > + WARN(1, "%scoherent pool not initialised!\n",
> > + force_dma_unencrypted(dev) ? "encrypted " : "");
> > return NULL;
> > }
> >
> > - val = gen_pool_alloc(atomic_pool, size);
> > + val = gen_pool_alloc(pool, size);
> > if (val) {
> > - phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
> > + phys_addr_t phys = gen_pool_virt_to_phys(pool, val);
> >
> > *ret_page = pfn_to_page(__phys_to_pfn(phys));
> > ptr = (void *)val;
> > memset(ptr, 0, size);
> > + if (force_dma_unencrypted(dev)) {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&encrypted_pool_size_lock, flags);
> > + encrypted_pool_size += size;
> > + if (encrypted_pool_size > encrypted_pool_size_max) {
> > + encrypted_pool_size_max = encrypted_pool_size;
> > + pr_info("max encrypted pool size now %lu\n",
> > + encrypted_pool_size_max);
> > + }
> > + spin_unlock_irqrestore(&encrypted_pool_size_lock, flags);
> > + }
> > }
> >
> > return ptr;
> > }
> >
> > -bool dma_free_from_pool(void *start, size_t size)
> > +bool dma_free_from_pool(struct device *dev, void *start, size_t size)
> > {
> > - if (!dma_in_atomic_pool(start, size))
> > + struct gen_pool *pool = dev_to_pool(dev);
> > +
> > + if (!dma_in_atomic_pool(dev, start, size))
> > return false;
> > - gen_pool_free(atomic_pool, (unsigned long)start, size);
> > + gen_pool_free(pool, (unsigned long)start, size);
> > + if (force_dma_unencrypted(dev)) {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&encrypted_pool_size_lock, flags);
> > + encrypted_pool_size -= size;
> > + spin_unlock_irqrestore(&encrypted_pool_size_lock, flags);
> > + }
> > return true;
> > }
> >
> > @@ -220,7 +274,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> > size = PAGE_ALIGN(size);
> >
> > if (!gfpflags_allow_blocking(flags)) {
> > - ret = dma_alloc_from_pool(size, &page, flags);
> > + ret = dma_alloc_from_pool(dev, size, &page, flags);
> > if (!ret)
> > return NULL;
> > goto done;
> > @@ -251,7 +305,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> > void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> > dma_addr_t dma_handle, unsigned long attrs)
> > {
> > - if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
> > + if (!dma_free_from_pool(dev, vaddr, PAGE_ALIGN(size))) {
> > phys_addr_t phys = dma_to_phys(dev, dma_handle);
> > struct page *page = pfn_to_page(__phys_to_pfn(phys));
> >
> >
>
set_memory_decrypted() may block so it is not possible to do non-blocking
allocations through the DMA API for devices that required unencrypted
memory.
The solution is to expand the atomic DMA pools for the various possible
gfp requirements as a means to prevent an unnecessary depletion of lowmem.
These atomic DMA pools are kept unencrypted so they can immediately be
used for non-blocking allocations. Since the need for this type of memory
depends on the kernel config and devices being used, these pools are also
dynamically expandable.
Some of these patches could be squashed into each other, but they were
separated out to ease code review.
This patchset is based on 5.6-rc4.
---
arch/x86/Kconfig | 1 +
drivers/iommu/dma-iommu.c | 5 +-
include/linux/dma-direct.h | 2 +
include/linux/dma-mapping.h | 6 +-
kernel/dma/direct.c | 17 ++--
kernel/dma/remap.c | 173 +++++++++++++++++++++++++-----------
6 files changed, 140 insertions(+), 64 deletions(-)
This augments the dma_{alloc,free}_from_pool() functions with a pointer
to the struct device of the allocation. This introduces no functional
change and will be used later to determine the optimal gfp mask to
allocate memory from.
dma_in_atomic_pool() is not used outside kernel/dma/remap.c, so remove
its declaration from the header file.
Signed-off-by: David Rientjes <[email protected]>
---
drivers/iommu/dma-iommu.c | 5 +++--
include/linux/dma-mapping.h | 6 +++---
kernel/dma/direct.c | 4 ++--
kernel/dma/remap.c | 9 +++++----
4 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -952,7 +952,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
/* Non-coherent atomic allocation? Easy */
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_free_from_pool(cpu_addr, alloc_size))
+ dma_free_from_pool(dev, cpu_addr, alloc_size))
return;
if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
@@ -1035,7 +1035,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
!gfpflags_allow_blocking(gfp) && !coherent)
- cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
+ cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page,
+ gfp);
else
cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
if (!cpu_addr)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -630,9 +630,9 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
pgprot_t prot, const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size);
-bool dma_in_atomic_pool(void *start, size_t size);
-void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
-bool dma_free_from_pool(void *start, size_t size);
+void *dma_alloc_from_pool(struct device *dev, size_t size,
+ struct page **ret_page, gfp_t flags);
+bool dma_free_from_pool(struct device *dev, void *start, size_t size);
int
dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr,
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -127,7 +127,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
dma_alloc_need_uncached(dev, attrs) &&
!gfpflags_allow_blocking(gfp)) {
- ret = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
+ ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
if (!ret)
return NULL;
goto done;
@@ -210,7 +210,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
}
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_free_from_pool(cpu_addr, PAGE_ALIGN(size)))
+ dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
return;
if (force_dma_unencrypted(dev))
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -173,7 +173,7 @@ static int __init dma_atomic_pool_init(void)
}
postcore_initcall(dma_atomic_pool_init);
-bool dma_in_atomic_pool(void *start, size_t size)
+static bool dma_in_atomic_pool(struct device *dev, void *start, size_t size)
{
if (unlikely(!atomic_pool))
return false;
@@ -181,7 +181,8 @@ bool dma_in_atomic_pool(void *start, size_t size)
return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
}
-void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
+void *dma_alloc_from_pool(struct device *dev, size_t size,
+ struct page **ret_page, gfp_t flags)
{
unsigned long val;
void *ptr = NULL;
@@ -203,9 +204,9 @@ void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
return ptr;
}
-bool dma_free_from_pool(void *start, size_t size)
+bool dma_free_from_pool(struct device *dev, void *start, size_t size)
{
- if (!dma_in_atomic_pool(start, size))
+ if (!dma_in_atomic_pool(dev, start, size))
return false;
gen_pool_free(atomic_pool, (unsigned long)start, size);
return true;
The single atomic pool is allocated from the lowest zone possible since
it is guaranteed to be applicable for any DMA allocation.
Devices may allocate through the DMA API but not have a strict reliance
on GFP_DMA memory. Since the atomic pool will be used for all
non-blockable allocations, returning all memory from ZONE_DMA may
unnecessarily deplete the zone.
Provision for multiple atomic pools that will map to the optimal gfp
mask of the device. These will be wired up in a subsequent patch.
Signed-off-by: David Rientjes <[email protected]>
---
kernel/dma/remap.c | 75 +++++++++++++++++++++++++++-------------------
1 file changed, 45 insertions(+), 30 deletions(-)
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -100,6 +100,8 @@ void dma_common_free_remap(void *cpu_addr, size_t size)
#ifdef CONFIG_DMA_DIRECT_REMAP
static struct gen_pool *atomic_pool __ro_after_init;
+static struct gen_pool *atomic_pool_dma32 __ro_after_init;
+static struct gen_pool *atomic_pool_normal __ro_after_init;
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
@@ -111,66 +113,79 @@ static int __init early_coherent_pool(char *p)
}
early_param("coherent_pool", early_coherent_pool);
-static gfp_t dma_atomic_pool_gfp(void)
+static int __init __dma_atomic_pool_init(struct gen_pool **pool,
+ size_t pool_size, gfp_t gfp)
{
- if (IS_ENABLED(CONFIG_ZONE_DMA))
- return GFP_DMA;
- if (IS_ENABLED(CONFIG_ZONE_DMA32))
- return GFP_DMA32;
- return GFP_KERNEL;
-}
-
-static int __init dma_atomic_pool_init(void)
-{
- unsigned int pool_size_order = get_order(atomic_pool_size);
- unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
+ const unsigned int order = get_order(pool_size);
+ const unsigned long nr_pages = pool_size >> PAGE_SHIFT;
struct page *page;
void *addr;
int ret;
if (dev_get_cma_area(NULL))
- page = dma_alloc_from_contiguous(NULL, nr_pages,
- pool_size_order, false);
+ page = dma_alloc_from_contiguous(NULL, nr_pages, order, false);
else
- page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order);
+ page = alloc_pages(gfp, order);
if (!page)
goto out;
- arch_dma_prep_coherent(page, atomic_pool_size);
+ arch_dma_prep_coherent(page, pool_size);
- atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
- if (!atomic_pool)
+ *pool = gen_pool_create(PAGE_SHIFT, -1);
+ if (!*pool)
goto free_page;
- addr = dma_common_contiguous_remap(page, atomic_pool_size,
+ addr = dma_common_contiguous_remap(page, pool_size,
pgprot_dmacoherent(PAGE_KERNEL),
__builtin_return_address(0));
if (!addr)
goto destroy_genpool;
- ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
- page_to_phys(page), atomic_pool_size, -1);
+ ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page),
+ pool_size, -1);
if (ret)
goto remove_mapping;
- gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
+ gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
- pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
- atomic_pool_size / 1024);
+ pr_info("DMA: preallocated %zu KiB %pGg pool for atomic allocations\n",
+ pool_size >> 10, &gfp);
return 0;
remove_mapping:
- dma_common_free_remap(addr, atomic_pool_size);
+ dma_common_free_remap(addr, pool_size);
destroy_genpool:
- gen_pool_destroy(atomic_pool);
- atomic_pool = NULL;
+ gen_pool_destroy(*pool);
+ *pool = NULL;
free_page:
if (!dma_release_from_contiguous(NULL, page, nr_pages))
- __free_pages(page, pool_size_order);
+ __free_pages(page, order);
out:
- pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
- atomic_pool_size / 1024);
+ pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n",
+ atomic_pool_size >> 10, &gfp);
return -ENOMEM;
}
+
+static int __init dma_atomic_pool_init(void)
+{
+ int ret = 0;
+ int err;
+
+ ret = __dma_atomic_pool_init(&atomic_pool_normal, atomic_pool_size,
+ GFP_KERNEL);
+ if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+ err = __dma_atomic_pool_init(&atomic_pool, atomic_pool_size,
+ GFP_DMA);
+ if (!ret && err)
+ ret = err;
+ }
+ if (IS_ENABLED(CONFIG_ZONE_DMA32)) {
+ err = __dma_atomic_pool_init(&atomic_pool_dma32,
+ atomic_pool_size, GFP_DMA32);
+ if (!ret && err)
+ ret = err;
+ }
+ return ret;
+}
postcore_initcall(dma_atomic_pool_init);
static bool dma_in_atomic_pool(struct device *dev, void *start, size_t size)
When an atomic pool becomes fully depleted because it is now relied upon
for all non-blocking allocations through the DMA API, allow background
expansion of each pool by a kworker.
When an atomic pool has less than the default size of memory left, kick
off a kworker to dynamically expand the pool in the background. The pool
is doubled in size.
This allows the default size to be kept quite low when one or more of the
atomic pools is not used.
Also switch over some node ids to the more appropriate NUMA_NO_NODE.
Signed-off-by: David Rientjes <[email protected]>
---
kernel/dma/remap.c | 79 ++++++++++++++++++++++++++++++++++------------
1 file changed, 58 insertions(+), 21 deletions(-)
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -10,6 +10,7 @@
#include <linux/genalloc.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
struct page **dma_common_find_pages(void *cpu_addr)
{
@@ -104,7 +105,10 @@ static struct gen_pool *atomic_pool_dma32 __ro_after_init;
static struct gen_pool *atomic_pool_normal __ro_after_init;
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
-static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
+static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE;
+
+/* Dynamic background expansion when the atomic pool is near capacity */
+struct work_struct atomic_pool_work;
static int __init early_coherent_pool(char *p)
{
@@ -113,14 +117,14 @@ static int __init early_coherent_pool(char *p)
}
early_param("coherent_pool", early_coherent_pool);
-static int __init __dma_atomic_pool_init(struct gen_pool **pool,
- size_t pool_size, gfp_t gfp)
+static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
+ gfp_t gfp)
{
- const unsigned int order = get_order(pool_size);
const unsigned long nr_pages = pool_size >> PAGE_SHIFT;
+ const unsigned int order = get_order(pool_size);
struct page *page;
void *addr;
- int ret;
+ int ret = -ENOMEM;
if (dev_get_cma_area(NULL))
page = dma_alloc_from_contiguous(NULL, nr_pages, order, false);
@@ -131,38 +135,67 @@ static int __init __dma_atomic_pool_init(struct gen_pool **pool,
arch_dma_prep_coherent(page, pool_size);
- *pool = gen_pool_create(PAGE_SHIFT, -1);
- if (!*pool)
- goto free_page;
-
addr = dma_common_contiguous_remap(page, pool_size,
pgprot_dmacoherent(PAGE_KERNEL),
__builtin_return_address(0));
if (!addr)
- goto destroy_genpool;
+ goto free_page;
- ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page),
- pool_size, -1);
+ ret = gen_pool_add_virt(pool, (unsigned long)addr, page_to_phys(page),
+ pool_size, NUMA_NO_NODE);
if (ret)
goto remove_mapping;
- gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
- pr_info("DMA: preallocated %zu KiB %pGg pool for atomic allocations\n",
- pool_size >> 10, &gfp);
return 0;
remove_mapping:
dma_common_free_remap(addr, pool_size);
-destroy_genpool:
- gen_pool_destroy(*pool);
- *pool = NULL;
free_page:
if (!dma_release_from_contiguous(NULL, page, nr_pages))
__free_pages(page, order);
out:
- pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n",
- atomic_pool_size >> 10, &gfp);
- return -ENOMEM;
+ return ret;
+}
+
+static void atomic_pool_resize(struct gen_pool *pool, gfp_t gfp)
+{
+ if (pool && gen_pool_avail(pool) < atomic_pool_size)
+ atomic_pool_expand(pool, gen_pool_size(pool), gfp);
+}
+
+static void atomic_pool_work_fn(struct work_struct *work)
+{
+ if (IS_ENABLED(CONFIG_ZONE_DMA))
+ atomic_pool_resize(atomic_pool, GFP_DMA);
+ if (IS_ENABLED(CONFIG_ZONE_DMA32))
+ atomic_pool_resize(atomic_pool_dma32, GFP_DMA32);
+ atomic_pool_resize(atomic_pool_normal, GFP_KERNEL);
+}
+
+static int __init __dma_atomic_pool_init(struct gen_pool **pool,
+ size_t pool_size, gfp_t gfp)
+{
+ int ret;
+
+ *pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
+ if (!*pool)
+ return -ENOMEM;
+
+ gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
+
+ ret = atomic_pool_expand(*pool, pool_size, gfp);
+ if (ret) {
+ gen_pool_destroy(*pool);
+ *pool = NULL;
+ pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n",
+ atomic_pool_size >> 10, &gfp);
+ return ret;
+ }
+
+
+ pr_info("DMA: preallocated %zu KiB %pGg pool for atomic allocations\n",
+ pool_size >> 10, &gfp);
+ return 0;
}
static int __init dma_atomic_pool_init(void)
@@ -170,6 +203,8 @@ static int __init dma_atomic_pool_init(void)
int ret = 0;
int err;
+ INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
+
ret = __dma_atomic_pool_init(&atomic_pool_normal, atomic_pool_size,
GFP_KERNEL);
if (IS_ENABLED(CONFIG_ZONE_DMA)) {
@@ -231,6 +266,8 @@ void *dma_alloc_from_pool(struct device *dev, size_t size,
ptr = (void *)val;
memset(ptr, 0, size);
}
+ if (gen_pool_avail(pool) < atomic_pool_size)
+ schedule_work(&atomic_pool_work);
return ptr;
}
When AMD memory encryption is enabled, all non-blocking DMA allocations
must originate from the atomic pools depending on the device and the gfp
mask of the allocation.
Keep all memory in these pools unencrypted.
Signed-off-by: David Rientjes <[email protected]>
---
arch/x86/Kconfig | 1 +
kernel/dma/direct.c | 9 ++++-----
kernel/dma/remap.c | 2 ++
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1523,6 +1523,7 @@ config X86_CPA_STATISTICS
config AMD_MEM_ENCRYPT
bool "AMD Secure Memory Encryption (SME) support"
depends on X86_64 && CPU_SUP_AMD
+ select DMA_DIRECT_REMAP
select DYNAMIC_PHYSICAL_MASK
select ARCH_USE_MEMREMAP_PROT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -125,7 +125,6 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
void *ret;
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs) &&
!gfpflags_allow_blocking(gfp)) {
ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
if (!ret)
@@ -202,6 +201,10 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
{
unsigned int page_order = get_order(size);
+ if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
+ return;
+
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
!force_dma_unencrypted(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
@@ -209,10 +212,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
return;
}
- if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
- return;
-
if (force_dma_unencrypted(dev))
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -8,6 +8,7 @@
#include <linux/dma-contiguous.h>
#include <linux/init.h>
#include <linux/genalloc.h>
+#include <linux/set_memory.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
@@ -141,6 +142,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
if (!addr)
goto free_page;
+ set_memory_decrypted((unsigned long)page_to_virt(page), nr_pages);
ret = gen_pool_add_virt(pool, (unsigned long)addr, page_to_phys(page),
pool_size, NUMA_NO_NODE);
if (ret)
When AMD memory encryption is enabled, some devices may used more than
256KB/sec from the atomic pools. Double the default size to make the
original size and expansion more appropriate.
This provides a slight optimization on initial expansion and is deemed
appropriate for all configs with CONFIG_DMA_REMAP enabled because of the
increased reliance on the atomic pools.
Alternatively, this could be done only when CONFIG_AMD_MEM_ENCRYPT is
enabled.
Signed-off-by: David Rientjes <[email protected]>
---
kernel/dma/remap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -105,7 +105,7 @@ static struct gen_pool *atomic_pool __ro_after_init;
static struct gen_pool *atomic_pool_dma32 __ro_after_init;
static struct gen_pool *atomic_pool_normal __ro_after_init;
-#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
+#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_512K
static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE;
/* Dynamic background expansion when the atomic pool is near capacity */
When allocating non-blockable memory, determine the optimal gfp mask of
the device and use the appropriate atomic pool.
The coherent DMA mask will remain the same between allocation and free
and, thus, memory will be freed to the same atomic pool it was allocated
from.
Signed-off-by: David Rientjes <[email protected]>
---
include/linux/dma-direct.h | 2 ++
kernel/dma/direct.c | 6 +++---
kernel/dma/remap.c | 34 ++++++++++++++++++++++++++--------
3 files changed, 31 insertions(+), 11 deletions(-)
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -67,6 +67,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size,
}
u64 dma_direct_get_required_mask(struct device *dev);
+gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
+ u64 *phys_mask);
void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs);
void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -44,8 +44,8 @@ u64 dma_direct_get_required_mask(struct device *dev)
return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
}
-static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
- u64 *phys_limit)
+gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
+ u64 *phys_limit)
{
u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
@@ -88,7 +88,7 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
/* we always manually zero the memory once we are done: */
gfp &= ~__GFP_ZERO;
- gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
+ gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_limit);
page = dma_alloc_contiguous(dev, alloc_size, gfp);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -188,28 +188,44 @@ static int __init dma_atomic_pool_init(void)
}
postcore_initcall(dma_atomic_pool_init);
+static inline struct gen_pool *dev_to_pool(struct device *dev)
+{
+ u64 phys_mask;
+ gfp_t gfp;
+
+ gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
+ &phys_mask);
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA)
+ return atomic_pool;
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32)
+ return atomic_pool_dma32;
+ return atomic_pool_normal;
+}
+
static bool dma_in_atomic_pool(struct device *dev, void *start, size_t size)
{
- if (unlikely(!atomic_pool))
- return false;
+ struct gen_pool *pool = dev_to_pool(dev);
- return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
+ if (unlikely(!pool))
+ return false;
+ return gen_pool_has_addr(pool, (unsigned long)start, size);
}
void *dma_alloc_from_pool(struct device *dev, size_t size,
struct page **ret_page, gfp_t flags)
{
+ struct gen_pool *pool = dev_to_pool(dev);
unsigned long val;
void *ptr = NULL;
- if (!atomic_pool) {
- WARN(1, "coherent pool not initialised!\n");
+ if (!pool) {
+ WARN(1, "%pGg atomic pool not initialised!\n", &flags);
return NULL;
}
- val = gen_pool_alloc(atomic_pool, size);
+ val = gen_pool_alloc(pool, size);
if (val) {
- phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
+ phys_addr_t phys = gen_pool_virt_to_phys(pool, val);
*ret_page = pfn_to_page(__phys_to_pfn(phys));
ptr = (void *)val;
@@ -221,9 +237,11 @@ void *dma_alloc_from_pool(struct device *dev, size_t size,
bool dma_free_from_pool(struct device *dev, void *start, size_t size)
{
+ struct gen_pool *pool = dev_to_pool(dev);
+
if (!dma_in_atomic_pool(dev, start, size))
return false;
- gen_pool_free(atomic_pool, (unsigned long)start, size);
+ gen_pool_free(pool, (unsigned long)start, size);
return true;
}
#endif /* CONFIG_DMA_DIRECT_REMAP */
On Sun, 1 Mar 2020, David Rientjes wrote:
> When an atomic pool becomes fully depleted because it is now relied upon
> for all non-blocking allocations through the DMA API, allow background
> expansion of each pool by a kworker.
>
> When an atomic pool has less than the default size of memory left, kick
> off a kworker to dynamically expand the pool in the background. The pool
> is doubled in size.
>
> This allows the default size to be kept quite low when one or more of the
> atomic pools is not used.
>
> Also switch over some node ids to the more appropriate NUMA_NO_NODE.
>
> Signed-off-by: David Rientjes <[email protected]>
> ---
> kernel/dma/remap.c | 79 ++++++++++++++++++++++++++++++++++------------
> 1 file changed, 58 insertions(+), 21 deletions(-)
>
> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> --- a/kernel/dma/remap.c
> +++ b/kernel/dma/remap.c
> @@ -10,6 +10,7 @@
> #include <linux/genalloc.h>
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
> +#include <linux/workqueue.h>
>
> struct page **dma_common_find_pages(void *cpu_addr)
> {
> @@ -104,7 +105,10 @@ static struct gen_pool *atomic_pool_dma32 __ro_after_init;
> static struct gen_pool *atomic_pool_normal __ro_after_init;
>
> #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
> -static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
> +static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE;
> +
> +/* Dynamic background expansion when the atomic pool is near capacity */
> +struct work_struct atomic_pool_work;
>
> static int __init early_coherent_pool(char *p)
> {
> @@ -113,14 +117,14 @@ static int __init early_coherent_pool(char *p)
> }
> early_param("coherent_pool", early_coherent_pool);
>
> -static int __init __dma_atomic_pool_init(struct gen_pool **pool,
> - size_t pool_size, gfp_t gfp)
> +static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
> + gfp_t gfp)
> {
> - const unsigned int order = get_order(pool_size);
> const unsigned long nr_pages = pool_size >> PAGE_SHIFT;
> + const unsigned int order = get_order(pool_size);
> struct page *page;
> void *addr;
> - int ret;
> + int ret = -ENOMEM;
>
> if (dev_get_cma_area(NULL))
> page = dma_alloc_from_contiguous(NULL, nr_pages, order, false);
There's an issue here if the pool grows too large which would result in
order > MAX_ORDER-1. We can fix that by limiting order to MAX_ORDER-1 and
doing nr_pages = 1 << order.
I should also add support for trying smaller page allocations if our
preferred expansion size results in an allocation failure.
Other than that, I'll remove the RFC tag and send a refreshed series by
the end of the week unless there are other comments or suggestions to
factor in.
Thanks!
> @@ -131,38 +135,67 @@ static int __init __dma_atomic_pool_init(struct gen_pool **pool,
>
> arch_dma_prep_coherent(page, pool_size);
>
> - *pool = gen_pool_create(PAGE_SHIFT, -1);
> - if (!*pool)
> - goto free_page;
> -
> addr = dma_common_contiguous_remap(page, pool_size,
> pgprot_dmacoherent(PAGE_KERNEL),
> __builtin_return_address(0));
> if (!addr)
> - goto destroy_genpool;
> + goto free_page;
>
> - ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page),
> - pool_size, -1);
> + ret = gen_pool_add_virt(pool, (unsigned long)addr, page_to_phys(page),
> + pool_size, NUMA_NO_NODE);
> if (ret)
> goto remove_mapping;
> - gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
>
> - pr_info("DMA: preallocated %zu KiB %pGg pool for atomic allocations\n",
> - pool_size >> 10, &gfp);
> return 0;
>
> remove_mapping:
> dma_common_free_remap(addr, pool_size);
> -destroy_genpool:
> - gen_pool_destroy(*pool);
> - *pool = NULL;
> free_page:
> if (!dma_release_from_contiguous(NULL, page, nr_pages))
> __free_pages(page, order);
> out:
> - pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n",
> - atomic_pool_size >> 10, &gfp);
> - return -ENOMEM;
> + return ret;
> +}
> +
> +static void atomic_pool_resize(struct gen_pool *pool, gfp_t gfp)
> +{
> + if (pool && gen_pool_avail(pool) < atomic_pool_size)
> + atomic_pool_expand(pool, gen_pool_size(pool), gfp);
> +}
> +
> +static void atomic_pool_work_fn(struct work_struct *work)
> +{
> + if (IS_ENABLED(CONFIG_ZONE_DMA))
> + atomic_pool_resize(atomic_pool, GFP_DMA);
> + if (IS_ENABLED(CONFIG_ZONE_DMA32))
> + atomic_pool_resize(atomic_pool_dma32, GFP_DMA32);
> + atomic_pool_resize(atomic_pool_normal, GFP_KERNEL);
> +}
> +
> +static int __init __dma_atomic_pool_init(struct gen_pool **pool,
> + size_t pool_size, gfp_t gfp)
> +{
> + int ret;
> +
> + *pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
> + if (!*pool)
> + return -ENOMEM;
> +
> + gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
> +
> + ret = atomic_pool_expand(*pool, pool_size, gfp);
> + if (ret) {
> + gen_pool_destroy(*pool);
> + *pool = NULL;
> + pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n",
> + atomic_pool_size >> 10, &gfp);
> + return ret;
> + }
> +
> +
> + pr_info("DMA: preallocated %zu KiB %pGg pool for atomic allocations\n",
> + pool_size >> 10, &gfp);
> + return 0;
> }
>
> static int __init dma_atomic_pool_init(void)
> @@ -170,6 +203,8 @@ static int __init dma_atomic_pool_init(void)
> int ret = 0;
> int err;
>
> + INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
> +
> ret = __dma_atomic_pool_init(&atomic_pool_normal, atomic_pool_size,
> GFP_KERNEL);
> if (IS_ENABLED(CONFIG_ZONE_DMA)) {
> @@ -231,6 +266,8 @@ void *dma_alloc_from_pool(struct device *dev, size_t size,
> ptr = (void *)val;
> memset(ptr, 0, size);
> }
> + if (gen_pool_avail(pool) < atomic_pool_size)
> + schedule_work(&atomic_pool_work);
>
> return ptr;
> }
>
On Sun, Mar 01, 2020 at 04:05:13PM -0800, David Rientjes wrote:
> The single atomic pool is allocated from the lowest zone possible since
> it is guaranteed to be applicable for any DMA allocation.
>
> Devices may allocate through the DMA API but not have a strict reliance
> on GFP_DMA memory. Since the atomic pool will be used for all
> non-blockable allocations, returning all memory from ZONE_DMA may
> unnecessarily deplete the zone.
>
> Provision for multiple atomic pools that will map to the optimal gfp
> mask of the device. These will be wired up in a subsequent patch.
>
> Signed-off-by: David Rientjes <[email protected]>
> ---
> kernel/dma/remap.c | 75 +++++++++++++++++++++++++++-------------------
> 1 file changed, 45 insertions(+), 30 deletions(-)
>
> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> --- a/kernel/dma/remap.c
> +++ b/kernel/dma/remap.c
> @@ -100,6 +100,8 @@ void dma_common_free_remap(void *cpu_addr, size_t size)
>
> #ifdef CONFIG_DMA_DIRECT_REMAP
> static struct gen_pool *atomic_pool __ro_after_init;
> +static struct gen_pool *atomic_pool_dma32 __ro_after_init;
> +static struct gen_pool *atomic_pool_normal __ro_after_init;
Maybe rename atomic_pool as well as it really kinda looks like the
default at the moment?
>
> #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
> static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
> @@ -111,66 +113,79 @@ static int __init early_coherent_pool(char *p)
> }
> early_param("coherent_pool", early_coherent_pool);
>
> -static gfp_t dma_atomic_pool_gfp(void)
> +static int __init __dma_atomic_pool_init(struct gen_pool **pool,
> + size_t pool_size, gfp_t gfp)
> {
Can this just return the pool and return NULL (or an ERR_PTR) on
failure?
On Sun, Mar 01, 2020 at 04:05:16PM -0800, David Rientjes wrote:
>
> When allocating non-blockable memory, determine the optimal gfp mask of
> the device and use the appropriate atomic pool.
>
> The coherent DMA mask will remain the same between allocation and free
> and, thus, memory will be freed to the same atomic pool it was allocated
> from.
I think this should go into the previous patch, as it is rather pointless
without the changes in this one.
On Sun, Mar 01, 2020 at 04:05:23PM -0800, David Rientjes wrote:
> When AMD memory encryption is enabled, all non-blocking DMA allocations
> must originate from the atomic pools depending on the device and the gfp
> mask of the allocation.
>
> Keep all memory in these pools unencrypted.
>
> Signed-off-by: David Rientjes <[email protected]>
> ---
> arch/x86/Kconfig | 1 +
> kernel/dma/direct.c | 9 ++++-----
> kernel/dma/remap.c | 2 ++
> 3 files changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -1523,6 +1523,7 @@ config X86_CPA_STATISTICS
> config AMD_MEM_ENCRYPT
> bool "AMD Secure Memory Encryption (SME) support"
> depends on X86_64 && CPU_SUP_AMD
> + select DMA_DIRECT_REMAP
I think we need to split the pool from remapping so that we don't drag
in the remap code for x86.
> if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> - dma_alloc_need_uncached(dev, attrs) &&
We still need a check here for either uncached or memory encryption.
> @@ -141,6 +142,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
> if (!addr)
> goto free_page;
>
> + set_memory_decrypted((unsigned long)page_to_virt(page), nr_pages);
This probably warrants a comment.
Also I think the infrastructure changes should be split from the x86
wire up.
On Sun, Mar 01, 2020 at 04:05:27PM -0800, David Rientjes wrote:
> When AMD memory encryption is enabled, some devices may used more than
> 256KB/sec from the atomic pools. Double the default size to make the
> original size and expansion more appropriate.
>
> This provides a slight optimization on initial expansion and is deemed
> appropriate for all configs with CONFIG_DMA_REMAP enabled because of the
> increased reliance on the atomic pools.
>
> Alternatively, this could be done only when CONFIG_AMD_MEM_ENCRYPT is
> enabled.
Can we just scale the initial size based on the system memory size?
On Thu, 5 Mar 2020, Christoph Hellwig wrote:
> On Sun, Mar 01, 2020 at 04:05:23PM -0800, David Rientjes wrote:
> > When AMD memory encryption is enabled, all non-blocking DMA allocations
> > must originate from the atomic pools depending on the device and the gfp
> > mask of the allocation.
> >
> > Keep all memory in these pools unencrypted.
> >
> > Signed-off-by: David Rientjes <[email protected]>
> > ---
> > arch/x86/Kconfig | 1 +
> > kernel/dma/direct.c | 9 ++++-----
> > kernel/dma/remap.c | 2 ++
> > 3 files changed, 7 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -1523,6 +1523,7 @@ config X86_CPA_STATISTICS
> > config AMD_MEM_ENCRYPT
> > bool "AMD Secure Memory Encryption (SME) support"
> > depends on X86_64 && CPU_SUP_AMD
> > + select DMA_DIRECT_REMAP
>
> I think we need to split the pool from remapping so that we don't drag
> in the remap code for x86.
>
Thanks for the review, Christoph. I can address all the comments that you
provided for the series but am hoping to get a clarification on this one
depending on how elaborate the change you would prefer.
As a preliminary change to this series, I could move the atomic pools and
coherent_pool command line to a new kernel/dma/atomic_pools.c file with a
new CONFIG_DMA_ATOMIC_POOLS that would get "select"ed by CONFIG_DMA_REMAP
and CONFIG_AMD_MEM_ENCRYPT and call into dma_common_contiguous_remap() if
we have CONFIG_DMA_DIRECT_REMAP when adding pages to the pool.
I think that's what you mean by splitting the pool from remapping,
otherwise we still have a full CONFIG_DMA_REMAP dependency here. If you
had something else in mind, please let me know. Thanks!
> > if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> > - dma_alloc_need_uncached(dev, attrs) &&
>
> We still need a check here for either uncached or memory encryption.
>
> > @@ -141,6 +142,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
> > if (!addr)
> > goto free_page;
> >
> > + set_memory_decrypted((unsigned long)page_to_virt(page), nr_pages);
>
> This probably warrants a comment.
>
> Also I think the infrastructure changes should be split from the x86
> wire up.
>
On Fri, Mar 06, 2020 at 04:36:07PM -0800, David Rientjes wrote:
> As a preliminary change to this series, I could move the atomic pools and
> coherent_pool command line to a new kernel/dma/atomic_pools.c file with a
> new CONFIG_DMA_ATOMIC_POOLS that would get "select"ed by CONFIG_DMA_REMAP
> and CONFIG_AMD_MEM_ENCRYPT and call into dma_common_contiguous_remap() if
> we have CONFIG_DMA_DIRECT_REMAP when adding pages to the pool.
Yes. Although I'd just name it kernel/dma/pool.c and
CONFIG_DMA_COHERENT_POOL or so, as I plan to reuse the code for
architectures that just preallocate all coherent memory at boot time
as well.
> I think that's what you mean by splitting the pool from remapping,
> otherwise we still have a full CONFIG_DMA_REMAP dependency here. If you
> had something else in mind, please let me know. Thanks!
Yes, that is exactly what I meant.
set_memory_decrypted() may block so it is not possible to do non-blocking
allocations through the DMA API for devices that required unencrypted
memory.
The solution is to expand the atomic DMA pools for the various possible
gfp requirements as a means to prevent an unnecessary depletion of lowmem.
These atomic pools are separated from the remap code and can be selected
for configurations that need them outside the scope of
CONFIG_DMA_DIRECT_REMAP, such as CONFIG_AMD_MEM_ENCRYPT.
These atomic DMA pools are kept unencrypted so they can immediately be
used for non-blocking allocations. Since the need for this type of memory
depends on the kernel config and devices being used, these pools are also
dynamically expandable.
There are a number of changes to v1 of the patchset based on Christoph's
feedback and the separation of the atomic pools out into the new
kernel/dma/pool.c.
NOTE! I'd like eyes from Christoph specifically on patch 4 in the series
where we handle the coherent pools differently depending on
CONFIG_DMA_COHERENT_POOL and/or CONFIG_DMA_DIRECT_REMAP and from Thomas
on the requirement for set_memory_decrypted() for all DMA coherent pools.
Why still an RFC? We are starting to aggressively test this series but
since there were a number of changes that were requested for the first
RFC, it would be better to have feedback and factor that into the series
earlier rather than later so testing can be focused on a series that
could be merged upstream.
This patchset is based on latest Linus HEAD:
commit ae46d2aa6a7fbe8ca0946f24b061b6ccdc6c3f25
Author: Hillf Danton <[email protected]>
Date: Wed Apr 8 11:59:24 2020 -0400
mm/gup: Let __get_user_pages_locked() return -EINTR for fatal signal
---
arch/x86/Kconfig | 1 +
drivers/iommu/dma-iommu.c | 5 +-
include/linux/dma-direct.h | 2 +
include/linux/dma-mapping.h | 6 +-
kernel/dma/Kconfig | 6 +-
kernel/dma/Makefile | 1 +
kernel/dma/direct.c | 28 ++++-
kernel/dma/pool.c | 224 ++++++++++++++++++++++++++++++++++++
kernel/dma/remap.c | 114 ------------------
9 files changed, 261 insertions(+), 126 deletions(-)
create mode 100644 kernel/dma/pool.c
DMA atomic pools will be needed beyond only CONFIG_DMA_DIRECT_REMAP so
separate them out into their own file.
This also adds a new Kconfig option that can be subsequently used for
options, such as CONFIG_AMD_MEM_ENCRYPT, that will utilize the coherent
pools but do not have a dependency on direct remapping.
For this patch alone, there is no functional change introduced.
Signed-off-by: David Rientjes <[email protected]>
---
kernel/dma/Kconfig | 6 ++-
kernel/dma/Makefile | 1 +
kernel/dma/pool.c | 125 ++++++++++++++++++++++++++++++++++++++++++++
kernel/dma/remap.c | 114 ----------------------------------------
4 files changed, 131 insertions(+), 115 deletions(-)
create mode 100644 kernel/dma/pool.c
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 4c103a24e380..d006668c0027 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -79,10 +79,14 @@ config DMA_REMAP
select DMA_NONCOHERENT_MMAP
bool
-config DMA_DIRECT_REMAP
+config DMA_COHERENT_POOL
bool
select DMA_REMAP
+config DMA_DIRECT_REMAP
+ bool
+ select DMA_COHERENT_POOL
+
config DMA_CMA
bool "DMA Contiguous Memory Allocator"
depends on HAVE_DMA_CONTIGUOUS && CMA
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index d237cf3dc181..370f63344e9c 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -6,4 +6,5 @@ obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o
obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
obj-$(CONFIG_DMA_API_DEBUG) += debug.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
+obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o
obj-$(CONFIG_DMA_REMAP) += remap.o
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
new file mode 100644
index 000000000000..64cbe5852cf6
--- /dev/null
+++ b/kernel/dma/pool.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (c) 2014 The Linux Foundation
+ * Copyright (C) 2020 Google LLC
+ */
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/dma-contiguous.h>
+#include <linux/init.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+static struct gen_pool *atomic_pool __ro_after_init;
+
+#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
+static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
+
+static int __init early_coherent_pool(char *p)
+{
+ atomic_pool_size = memparse(p, &p);
+ return 0;
+}
+early_param("coherent_pool", early_coherent_pool);
+
+static gfp_t dma_atomic_pool_gfp(void)
+{
+ if (IS_ENABLED(CONFIG_ZONE_DMA))
+ return GFP_DMA;
+ if (IS_ENABLED(CONFIG_ZONE_DMA32))
+ return GFP_DMA32;
+ return GFP_KERNEL;
+}
+
+static int __init dma_atomic_pool_init(void)
+{
+ unsigned int pool_size_order = get_order(atomic_pool_size);
+ unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
+ struct page *page;
+ void *addr;
+ int ret;
+
+ if (dev_get_cma_area(NULL))
+ page = dma_alloc_from_contiguous(NULL, nr_pages,
+ pool_size_order, false);
+ else
+ page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order);
+ if (!page)
+ goto out;
+
+ arch_dma_prep_coherent(page, atomic_pool_size);
+
+ atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
+ if (!atomic_pool)
+ goto free_page;
+
+ addr = dma_common_contiguous_remap(page, atomic_pool_size,
+ pgprot_dmacoherent(PAGE_KERNEL),
+ __builtin_return_address(0));
+ if (!addr)
+ goto destroy_genpool;
+
+ ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
+ page_to_phys(page), atomic_pool_size, -1);
+ if (ret)
+ goto remove_mapping;
+ gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
+
+ pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
+ atomic_pool_size / 1024);
+ return 0;
+
+remove_mapping:
+ dma_common_free_remap(addr, atomic_pool_size);
+destroy_genpool:
+ gen_pool_destroy(atomic_pool);
+ atomic_pool = NULL;
+free_page:
+ if (!dma_release_from_contiguous(NULL, page, nr_pages))
+ __free_pages(page, pool_size_order);
+out:
+ pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
+ atomic_pool_size / 1024);
+ return -ENOMEM;
+}
+postcore_initcall(dma_atomic_pool_init);
+
+bool dma_in_atomic_pool(void *start, size_t size)
+{
+ if (unlikely(!atomic_pool))
+ return false;
+
+ return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
+}
+
+void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
+{
+ unsigned long val;
+ void *ptr = NULL;
+
+ if (!atomic_pool) {
+ WARN(1, "coherent pool not initialised!\n");
+ return NULL;
+ }
+
+ val = gen_pool_alloc(atomic_pool, size);
+ if (val) {
+ phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
+
+ *ret_page = pfn_to_page(__phys_to_pfn(phys));
+ ptr = (void *)val;
+ memset(ptr, 0, size);
+ }
+
+ return ptr;
+}
+
+bool dma_free_from_pool(void *start, size_t size)
+{
+ if (!dma_in_atomic_pool(start, size))
+ return false;
+ gen_pool_free(atomic_pool, (unsigned long)start, size);
+ return true;
+}
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index d14cbc83986a..081486007342 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -97,117 +97,3 @@ void dma_common_free_remap(void *cpu_addr, size_t size)
unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
vunmap(cpu_addr);
}
-
-#ifdef CONFIG_DMA_DIRECT_REMAP
-static struct gen_pool *atomic_pool __ro_after_init;
-
-#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
-static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
-
-static int __init early_coherent_pool(char *p)
-{
- atomic_pool_size = memparse(p, &p);
- return 0;
-}
-early_param("coherent_pool", early_coherent_pool);
-
-static gfp_t dma_atomic_pool_gfp(void)
-{
- if (IS_ENABLED(CONFIG_ZONE_DMA))
- return GFP_DMA;
- if (IS_ENABLED(CONFIG_ZONE_DMA32))
- return GFP_DMA32;
- return GFP_KERNEL;
-}
-
-static int __init dma_atomic_pool_init(void)
-{
- unsigned int pool_size_order = get_order(atomic_pool_size);
- unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
- struct page *page;
- void *addr;
- int ret;
-
- if (dev_get_cma_area(NULL))
- page = dma_alloc_from_contiguous(NULL, nr_pages,
- pool_size_order, false);
- else
- page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order);
- if (!page)
- goto out;
-
- arch_dma_prep_coherent(page, atomic_pool_size);
-
- atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
- if (!atomic_pool)
- goto free_page;
-
- addr = dma_common_contiguous_remap(page, atomic_pool_size,
- pgprot_dmacoherent(PAGE_KERNEL),
- __builtin_return_address(0));
- if (!addr)
- goto destroy_genpool;
-
- ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
- page_to_phys(page), atomic_pool_size, -1);
- if (ret)
- goto remove_mapping;
- gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
-
- pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
- atomic_pool_size / 1024);
- return 0;
-
-remove_mapping:
- dma_common_free_remap(addr, atomic_pool_size);
-destroy_genpool:
- gen_pool_destroy(atomic_pool);
- atomic_pool = NULL;
-free_page:
- if (!dma_release_from_contiguous(NULL, page, nr_pages))
- __free_pages(page, pool_size_order);
-out:
- pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
- atomic_pool_size / 1024);
- return -ENOMEM;
-}
-postcore_initcall(dma_atomic_pool_init);
-
-bool dma_in_atomic_pool(void *start, size_t size)
-{
- if (unlikely(!atomic_pool))
- return false;
-
- return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
-}
-
-void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
-{
- unsigned long val;
- void *ptr = NULL;
-
- if (!atomic_pool) {
- WARN(1, "coherent pool not initialised!\n");
- return NULL;
- }
-
- val = gen_pool_alloc(atomic_pool, size);
- if (val) {
- phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
-
- *ret_page = pfn_to_page(__phys_to_pfn(phys));
- ptr = (void *)val;
- memset(ptr, 0, size);
- }
-
- return ptr;
-}
-
-bool dma_free_from_pool(void *start, size_t size)
-{
- if (!dma_in_atomic_pool(start, size))
- return false;
- gen_pool_free(atomic_pool, (unsigned long)start, size);
- return true;
-}
-#endif /* CONFIG_DMA_DIRECT_REMAP */
When a device required unencrypted memory and the context does not allow
blocking, memory must be returned from the atomic coherent pools.
This avoids the remap when CONFIG_DMA_DIRECT_REMAP is not enabled and the
config only requires CONFIG_DMA_COHERENT_POOL. This will be used for
CONFIG_AMD_MEM_ENCRYPT in a subsequent patch.
Keep all memory in these pools unencrypted.
Signed-off-by: David Rientjes <[email protected]>
---
kernel/dma/direct.c | 16 ++++++++++++++++
kernel/dma/pool.c | 15 +++++++++++++--
2 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 70800ca64f13..44165263c185 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -124,6 +124,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
struct page *page;
void *ret;
+ /*
+ * Unencrypted memory must come directly from DMA atomic pools if
+ * blocking is not allowed.
+ */
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) {
+ ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
+ if (!ret)
+ return NULL;
+ goto done;
+ }
+
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
dma_alloc_need_uncached(dev, attrs) &&
!gfpflags_allow_blocking(gfp)) {
@@ -203,6 +215,10 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
{
unsigned int page_order = get_order(size);
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
+ return;
+
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
!force_dma_unencrypted(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index e14c5a2da734..6685ab89cfa7 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -9,6 +9,7 @@
#include <linux/dma-contiguous.h>
#include <linux/init.h>
#include <linux/genalloc.h>
+#include <linux/set_memory.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
@@ -55,12 +56,20 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
arch_dma_prep_coherent(page, pool_size);
+#ifdef CONFIG_DMA_DIRECT_REMAP
addr = dma_common_contiguous_remap(page, pool_size,
pgprot_dmacoherent(PAGE_KERNEL),
__builtin_return_address(0));
if (!addr)
goto free_page;
-
+#else
+ addr = page_to_virt(page);
+#endif
+ /*
+ * Memory in the atomic DMA pools must be unencrypted, the pools do not
+ * shrink so no re-encryption occurs in dma_direct_free_pages().
+ */
+ set_memory_decrypted((unsigned long)page_to_virt(page), 1 << order);
ret = gen_pool_add_virt(pool, (unsigned long)addr, page_to_phys(page),
pool_size, NUMA_NO_NODE);
if (ret)
@@ -69,8 +78,10 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
return 0;
remove_mapping:
+#ifdef CONFIG_DMA_DIRECT_REMAP
dma_common_free_remap(addr, pool_size);
-free_page:
+#endif
+free_page: __maybe_unused
if (!dma_release_from_contiguous(NULL, page, 1 << order))
__free_pages(page, order);
out:
On 4/8/20 4:21 PM, David Rientjes wrote:
> When a device required unencrypted memory and the context does not allow
required => requires
> blocking, memory must be returned from the atomic coherent pools.
>
> This avoids the remap when CONFIG_DMA_DIRECT_REMAP is not enabled and the
> config only requires CONFIG_DMA_COHERENT_POOL. This will be used for
> CONFIG_AMD_MEM_ENCRYPT in a subsequent patch.
>
> Keep all memory in these pools unencrypted.
>
> Signed-off-by: David Rientjes <[email protected]>
> ---
> kernel/dma/direct.c | 16 ++++++++++++++++
> kernel/dma/pool.c | 15 +++++++++++++--
> 2 files changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 70800ca64f13..44165263c185 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -124,6 +124,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> struct page *page;
> void *ret;
>
> + /*
> + * Unencrypted memory must come directly from DMA atomic pools if
> + * blocking is not allowed.
> + */
> + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> + force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) {
> + ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
> + if (!ret)
> + return NULL;
> + goto done;
> + }
> +
> if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> dma_alloc_need_uncached(dev, attrs) &&
> !gfpflags_allow_blocking(gfp)) {
> @@ -203,6 +215,10 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> {
> unsigned int page_order = get_order(size);
>
> + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> + dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
> + return;
> +
> if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
> !force_dma_unencrypted(dev)) {
> /* cpu_addr is a struct page cookie, not a kernel address */
> diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> index e14c5a2da734..6685ab89cfa7 100644
> --- a/kernel/dma/pool.c
> +++ b/kernel/dma/pool.c
> @@ -9,6 +9,7 @@
> #include <linux/dma-contiguous.h>
> #include <linux/init.h>
> #include <linux/genalloc.h>
> +#include <linux/set_memory.h>
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
> #include <linux/workqueue.h>
> @@ -55,12 +56,20 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
>
> arch_dma_prep_coherent(page, pool_size);
>
> +#ifdef CONFIG_DMA_DIRECT_REMAP
> addr = dma_common_contiguous_remap(page, pool_size,
> pgprot_dmacoherent(PAGE_KERNEL),
> __builtin_return_address(0));
> if (!addr)
> goto free_page;
> -
> +#else
> + addr = page_to_virt(page);
> +#endif
> + /*
> + * Memory in the atomic DMA pools must be unencrypted, the pools do not
> + * shrink so no re-encryption occurs in dma_direct_free_pages().
> + */
> + set_memory_decrypted((unsigned long)page_to_virt(page), 1 << order);
> ret = gen_pool_add_virt(pool, (unsigned long)addr, page_to_phys(page),
> pool_size, NUMA_NO_NODE);
> if (ret)
> @@ -69,8 +78,10 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
> return 0;
>
> remove_mapping:
> +#ifdef CONFIG_DMA_DIRECT_REMAP
> dma_common_free_remap(addr, pool_size);
You're about to free the memory, but you've called set_memory_decrypted()
against it, so you need to do a set_memory_encrypted() to bring it back to
a state ready for allocation again.
Thanks,
Tom
> -free_page:
> +#endif
> +free_page: __maybe_unused
> if (!dma_release_from_contiguous(NULL, page, 1 << order))
> __free_pages(page, order);
> out:
>
> + /*
> + * Unencrypted memory must come directly from DMA atomic pools if
> + * blocking is not allowed.
> + */
> + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> + force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) {
> + ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
> + if (!ret)
> + return NULL;
> + goto done;
> + }
> +
> if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> dma_alloc_need_uncached(dev, attrs) &&
> !gfpflags_allow_blocking(gfp)) {
Can we keep a single conditional for the pool allocations? Maybe
add a new dma_alloc_from_pool helper ala:
static inline bool dma_alloc_from_pool(struct device *dev, gfp_t gfp)
{
if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
return false;
if (gfpflags_allow_blocking(gfp))
return false;
if (force_dma_unencrypted(dev))
return true;
if (dma_alloc_need_uncached(dev))
return true;
}
}
> @@ -203,6 +215,10 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> {
> unsigned int page_order = get_order(size);
>
> + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> + dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
> + return;
> +
Similarly I think we should have a single conditional to free from the
pool instead.
On Wed, Apr 08, 2020 at 02:21:03PM -0700, David Rientjes wrote:
> +++ b/kernel/dma/pool.c
> @@ -0,0 +1,125 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2012 ARM Ltd.
> + * Copyright (c) 2014 The Linux Foundation
> + * Copyright (C) 2020 Google LLC
Of the old copyrights the LF ones apply to the remapping helpers,
that are stil in remap.c, so they don't need to be added here.
Otherwise this looks good:
Reviewed-by: Christoph Hellwig <[email protected]>
On Thu, 9 Apr 2020, Tom Lendacky wrote:
> > When a device required unencrypted memory and the context does not allow
>
> required => requires
>
Fixed, thanks.
> > blocking, memory must be returned from the atomic coherent pools.
> >
> > This avoids the remap when CONFIG_DMA_DIRECT_REMAP is not enabled and the
> > config only requires CONFIG_DMA_COHERENT_POOL. This will be used for
> > CONFIG_AMD_MEM_ENCRYPT in a subsequent patch.
> >
> > Keep all memory in these pools unencrypted.
> >
> > Signed-off-by: David Rientjes <[email protected]>
> > ---
> > kernel/dma/direct.c | 16 ++++++++++++++++
> > kernel/dma/pool.c | 15 +++++++++++++--
> > 2 files changed, 29 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> > index 70800ca64f13..44165263c185 100644
> > --- a/kernel/dma/direct.c
> > +++ b/kernel/dma/direct.c
> > @@ -124,6 +124,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t
> > size,
> > struct page *page;
> > void *ret;
> > + /*
> > + * Unencrypted memory must come directly from DMA atomic pools if
> > + * blocking is not allowed.
> > + */
> > + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> > + force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) {
> > + ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
> > + if (!ret)
> > + return NULL;
> > + goto done;
> > + }
> > +
> > if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> > dma_alloc_need_uncached(dev, attrs) &&
> > !gfpflags_allow_blocking(gfp)) {
> > @@ -203,6 +215,10 @@ void dma_direct_free_pages(struct device *dev, size_t
> > size, void *cpu_addr,
> > {
> > unsigned int page_order = get_order(size);
> > + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> > + dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
> > + return;
> > +
> > if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
> > !force_dma_unencrypted(dev)) {
> > /* cpu_addr is a struct page cookie, not a kernel address */
> > diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> > index e14c5a2da734..6685ab89cfa7 100644
> > --- a/kernel/dma/pool.c
> > +++ b/kernel/dma/pool.c
> > @@ -9,6 +9,7 @@
> > #include <linux/dma-contiguous.h>
> > #include <linux/init.h>
> > #include <linux/genalloc.h>
> > +#include <linux/set_memory.h>
> > #include <linux/slab.h>
> > #include <linux/vmalloc.h>
> > #include <linux/workqueue.h>
> > @@ -55,12 +56,20 @@ static int atomic_pool_expand(struct gen_pool *pool,
> > size_t pool_size,
> > arch_dma_prep_coherent(page, pool_size);
> > +#ifdef CONFIG_DMA_DIRECT_REMAP
> > addr = dma_common_contiguous_remap(page, pool_size,
> > pgprot_dmacoherent(PAGE_KERNEL),
> > __builtin_return_address(0));
> > if (!addr)
> > goto free_page;
> > -
> > +#else
> > + addr = page_to_virt(page);
> > +#endif
> > + /*
> > + * Memory in the atomic DMA pools must be unencrypted, the pools do
> > not
> > + * shrink so no re-encryption occurs in dma_direct_free_pages().
> > + */
> > + set_memory_decrypted((unsigned long)page_to_virt(page), 1 << order);
> > ret = gen_pool_add_virt(pool, (unsigned long)addr, page_to_phys(page),
> > pool_size, NUMA_NO_NODE);
> > if (ret)
> > @@ -69,8 +78,10 @@ static int atomic_pool_expand(struct gen_pool *pool,
> > size_t pool_size,
> > return 0;
> > remove_mapping:
> > +#ifdef CONFIG_DMA_DIRECT_REMAP
> > dma_common_free_remap(addr, pool_size);
>
> You're about to free the memory, but you've called set_memory_decrypted()
> against it, so you need to do a set_memory_encrypted() to bring it back to a
> state ready for allocation again.
>
Ah, good catch, thanks. I notice that I should also be checking the
return value of set_memory_decrypted() because pages added to the coherent
pools *must* be unencrypted. If it fails, we fail the expansion.
And do the same thing for set_memory_encrypted(), which would be a bizarre
situation (decrypt succeeded, encrypt failed), by simply leaking the page.
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -7,6 +7,7 @@
#include <linux/dma-contiguous.h>
#include <linux/init.h>
#include <linux/genalloc.h>
+#include <linux/set_memory.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
@@ -53,22 +54,42 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
arch_dma_prep_coherent(page, pool_size);
+#ifdef CONFIG_DMA_DIRECT_REMAP
addr = dma_common_contiguous_remap(page, pool_size,
pgprot_dmacoherent(PAGE_KERNEL),
__builtin_return_address(0));
if (!addr)
goto free_page;
-
+#else
+ addr = page_to_virt(page);
+#endif
+ /*
+ * Memory in the atomic DMA pools must be unencrypted, the pools do not
+ * shrink so no re-encryption occurs in dma_direct_free_pages().
+ */
+ ret = set_memory_decrypted((unsigned long)page_to_virt(page),
+ 1 << order);
+ if (ret)
+ goto remove_mapping;
ret = gen_pool_add_virt(pool, (unsigned long)addr, page_to_phys(page),
pool_size, NUMA_NO_NODE);
if (ret)
- goto remove_mapping;
+ goto encrypt_mapping;
return 0;
+encrypt_mapping:
+ ret = set_memory_encrypted((unsigned long)page_to_virt(page),
+ 1 << order);
+ if (WARN_ON_ONCE(ret)) {
+ /* Decrypt succeeded but encrypt failed, purposely leak */
+ goto out;
+ }
remove_mapping:
+#ifdef CONFIG_DMA_DIRECT_REMAP
dma_common_free_remap(addr, pool_size);
-free_page:
+#endif
+free_page: __maybe_unused
if (!dma_release_from_contiguous(NULL, page, 1 << order))
__free_pages(page, order);
out:
On Tue, 14 Apr 2020, Christoph Hellwig wrote:
> > + /*
> > + * Unencrypted memory must come directly from DMA atomic pools if
> > + * blocking is not allowed.
> > + */
> > + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> > + force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) {
> > + ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
> > + if (!ret)
> > + return NULL;
> > + goto done;
> > + }
> > +
> > if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> > dma_alloc_need_uncached(dev, attrs) &&
> > !gfpflags_allow_blocking(gfp)) {
>
> Can we keep a single conditional for the pool allocations? Maybe
> add a new dma_alloc_from_pool helper ala:
>
> static inline bool dma_alloc_from_pool(struct device *dev, gfp_t gfp)
> {
> if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
> return false;
> if (gfpflags_allow_blocking(gfp))
> return false;
> if (force_dma_unencrypted(dev))
> return true;
> if (dma_alloc_need_uncached(dev))
> return true;
> }
Looks good, fixed. I renamed it to dma_should_alloc_from_pool() to avoid
confusing it with the actual allocation function and added a
dma_should_free_from_pool() as well.
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -75,6 +75,39 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
}
+/*
+ * Decrypting memory is allowed to block, so if this device requires
+ * unencrypted memory it must come from atomic pools.
+ */
+static inline bool dma_should_alloc_from_pool(struct device *dev, gfp_t gfp,
+ unsigned long attrs)
+{
+ if (!IS_ENABLED(CONFIG_DMA_COHERENTPOOL))
+ return false;
+ if (gfpflags_allow_blocking(gfp))
+ return false;
+ if (force_dma_unencrypted(dev))
+ return true;
+ if (!IS_ENABLED(CONFIG_DMA_DIRECT_REMAP))
+ return false;
+ if (dma_alloc_need_uncached(dev, attrs))
+ return true;
+ return false;
+}
+
+static inline bool dma_should_free_from_pool(struct device *dev,
+ unsigned long attrs)
+{
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
+ return true;
+ if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
+ !force_dma_unencrypted(dev))
+ return false;
+ if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP))
+ return true;
+ return false;
+}
+
struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp_t gfp, unsigned long attrs)
{
@@ -124,9 +157,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
struct page *page;
void *ret;
- if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs) &&
- !gfpflags_allow_blocking(gfp)) {
+ if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
if (!ret)
return NULL;
@@ -202,6 +233,11 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
{
unsigned int page_order = get_order(size);
+ /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
+ if (dma_should_free_from_pool(dev, attrs) &&
+ dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
+ return;
+
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
!force_dma_unencrypted(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
@@ -209,10 +245,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
return;
}
- if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
- return;
-
if (force_dma_unencrypted(dev))
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);