2022-11-21 20:41:06

by Dexuan Cui

[permalink] [raw]
Subject: [PATCH] swiotlb: check set_memory_decrypted()'s return value

swiotlb_update_mem_attributes() is called from a system where decrypted
swiotlb bounce buffers are required. Panic the system if
set_memory_decrypted() fails.

When rmem_swiotlb_device_init() -> set_memory_decrypted(), let's try
to handle it gracefully.

Not sure how to handle the failure in swiotlb_init_late(). Add a WARN().

Signed-off-by: Dexuan Cui <[email protected]>
---
kernel/dma/swiotlb.c | 42 +++++++++++++++++++++++++++++-------------
1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 339a990554e7..8e26c09c625c 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -248,12 +248,16 @@ void __init swiotlb_update_mem_attributes(void)
struct io_tlb_mem *mem = &io_tlb_default_mem;
void *vaddr;
unsigned long bytes;
+ int rc;

if (!mem->nslabs || mem->late_alloc)
return;
vaddr = phys_to_virt(mem->start);
bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
- set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+
+ rc = set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+ if (rc)
+ panic("Failed to decrypt swiotlb bounce buffers (%d)\n", rc);

mem->vaddr = swiotlb_mem_remap(mem, bytes);
if (!mem->vaddr)
@@ -447,8 +451,10 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
if (!mem->slots)
goto error_slots;

- set_memory_decrypted((unsigned long)vstart,
- (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+ rc = set_memory_decrypted((unsigned long)vstart,
+ (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+ WARN(rc, "Failed to decrypt swiotlb bounce buffers (%d)\n", rc);
+
swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
default_nareas);

@@ -986,6 +992,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,

/* Set Per-device io tlb area to one */
unsigned int nareas = 1;
+ int rc = -ENOMEM;

/*
* Since multiple devices can share the same pool, the private data,
@@ -998,21 +1005,22 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
return -ENOMEM;

mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
- if (!mem->slots) {
- kfree(mem);
- return -ENOMEM;
- }
+ if (!mem->slots)
+ goto free_mem;

mem->areas = kcalloc(nareas, sizeof(*mem->areas),
GFP_KERNEL);
- if (!mem->areas) {
- kfree(mem->slots);
- kfree(mem);
- return -ENOMEM;
+ if (!mem->areas)
+ goto free_slots;
+
+ rc = set_memory_decrypted(
+ (unsigned long)phys_to_virt(rmem->base),
+ rmem->size >> PAGE_SHIFT);
+ if (rc) {
+ pr_err("Failed to decrypt rmem buffer (%d)\n", rc);
+ goto free_areas;
}

- set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
- rmem->size >> PAGE_SHIFT);
swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
false, nareas);
mem->for_alloc = true;
@@ -1025,6 +1033,14 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
dev->dma_io_tlb_mem = mem;

return 0;
+
+free_areas:
+ kfree(mem->areas);
+free_slots:
+ kfree(mem->slots);
+free_mem:
+ kfree(mem);
+ return rc;
}

static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
--
2.25.1



2022-11-21 21:26:50

by Robin Murphy

[permalink] [raw]
Subject: Re: [PATCH] swiotlb: check set_memory_decrypted()'s return value

On 2022-11-21 19:48, Dexuan Cui wrote:
> swiotlb_update_mem_attributes() is called from a system where decrypted
> swiotlb bounce buffers are required. Panic the system if
> set_memory_decrypted() fails.
>
> When rmem_swiotlb_device_init() -> set_memory_decrypted(), let's try
> to handle it gracefully.
>
> Not sure how to handle the failure in swiotlb_init_late(). Add a WARN().
>
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> kernel/dma/swiotlb.c | 42 +++++++++++++++++++++++++++++-------------
> 1 file changed, 29 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 339a990554e7..8e26c09c625c 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -248,12 +248,16 @@ void __init swiotlb_update_mem_attributes(void)
> struct io_tlb_mem *mem = &io_tlb_default_mem;
> void *vaddr;
> unsigned long bytes;
> + int rc;
>
> if (!mem->nslabs || mem->late_alloc)
> return;
> vaddr = phys_to_virt(mem->start);
> bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
> - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> +
> + rc = set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> + if (rc)
> + panic("Failed to decrypt swiotlb bounce buffers (%d)\n", rc);

Aww, I just cleaned up all the panics! AFAICS we could warn and set
mem->nslabs to 0 here to semi-gracefully disable SWIOTLB (presumably
decryption failure is sufficiently unexpected that "leaking" the SWIOTLB
memory is the least of the system's problems). Or indeed just warn and
do nothing as in the swiotlb_init_late() case below - what's with the
inconsistency? In either path we have the same expectation that
decryption succeeds (or does nothing, successfully), so failure is no
more or less fatal to later SWIOTLB operation depending on when the
architecture chose to set it up.

> mem->vaddr = swiotlb_mem_remap(mem, bytes);
> if (!mem->vaddr)
> @@ -447,8 +451,10 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
> if (!mem->slots)
> goto error_slots;
>
> - set_memory_decrypted((unsigned long)vstart,
> - (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
> + rc = set_memory_decrypted((unsigned long)vstart,
> + (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
> + WARN(rc, "Failed to decrypt swiotlb bounce buffers (%d)\n", rc);
> +
> swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
> default_nareas);
>
> @@ -986,6 +992,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
>
> /* Set Per-device io tlb area to one */
> unsigned int nareas = 1;
> + int rc = -ENOMEM;
>
> /*
> * Since multiple devices can share the same pool, the private data,
> @@ -998,21 +1005,22 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
> return -ENOMEM;
>
> mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
> - if (!mem->slots) {
> - kfree(mem);
> - return -ENOMEM;
> - }
> + if (!mem->slots)
> + goto free_mem;
>
> mem->areas = kcalloc(nareas, sizeof(*mem->areas),
> GFP_KERNEL);
> - if (!mem->areas) {
> - kfree(mem->slots);
> - kfree(mem);
> - return -ENOMEM;
> + if (!mem->areas)
> + goto free_slots;
> +
> + rc = set_memory_decrypted(
> + (unsigned long)phys_to_virt(rmem->base),
> + rmem->size >> PAGE_SHIFT);
> + if (rc) {
> + pr_err("Failed to decrypt rmem buffer (%d)\n", rc);
> + goto free_areas;

So in 3 init paths we have 3 different outcomes from the same thing :(

1: make the whole system unusable
2: continue with possible data corruption (or at least weird DMA errors)
if devices still see encrypted memory contents
3: cleanly disable SWIOTLB, thus also all subsequent attempts to use it

(OK, for the rmem case this isn't actually 3 since falling back to
io_tlb_default_mem might work out more like 2, but hopefully you get my
point)

Thanks,
Robin.

> }
>
> - set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
> - rmem->size >> PAGE_SHIFT);
> swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
> false, nareas);
> mem->for_alloc = true;
> @@ -1025,6 +1033,14 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
> dev->dma_io_tlb_mem = mem;
>
> return 0;
> +
> +free_areas:
> + kfree(mem->areas);
> +free_slots:
> + kfree(mem->slots);
> +free_mem:
> + kfree(mem);
> + return rc;
> }
>
> static void rmem_swiotlb_device_release(struct reserved_mem *rmem,

2022-11-24 02:02:10

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH] swiotlb: check set_memory_decrypted()'s return value

> From: Robin Murphy <[email protected]>
> Sent: Monday, November 21, 2022 12:49 PM
> > [...]
> > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> > @@ -248,12 +248,16 @@ void __init swiotlb_update_mem_attributes(void)
> > struct io_tlb_mem *mem = &io_tlb_default_mem;
> > void *vaddr;
> > unsigned long bytes;
> > + int rc;
> >
> > if (!mem->nslabs || mem->late_alloc)
> > return;
> > vaddr = phys_to_virt(mem->start);
> > bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
> > - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> > +
> > + rc = set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> > + if (rc)
> > + panic("Failed to decrypt swiotlb bounce buffers (%d)\n", rc);
>
> Aww, I just cleaned up all the panics! AFAICS we could warn and set
Sorry, I didn't know about that... :-)

> mem->nslabs to 0 here to semi-gracefully disable SWIOTLB (presumably
> decryption failure is sufficiently unexpected that "leaking" the SWIOTLB
> memory is the least of the system's problems). Or indeed just warn and
> do nothing as in the swiotlb_init_late() case below - what's with the
> inconsistency? In either path we have the same expectation that
> decryption succeeds (or does nothing, successfully), so failure is no
> more or less fatal to later SWIOTLB operation depending on when the
> architecture chose to set it up.

I agree it's better to print an error message rather than panic here, but
anyway the kernel will panic later, e.g. when an AMD SEV-SNP guest runs
on Hyper-V, in case this set_memory_decrypted() fails, we set
mem->nslabs to 0, and next we'll hit a panic soon when the storage
driver calls swiotlb_tbl_map_single():
"Kernel panic - not syncing: Can not allocate SWIOTLB buffer earlier and
can't now provide you with the DMA bounce buffer".

> So in 3 init paths we have 3 different outcomes from the same thing :(
>
> 1: make the whole system unusable
> 2: continue with possible data corruption (or at least weird DMA errors)
> if devices still see encrypted memory contents
> 3: cleanly disable SWIOTLB, thus also all subsequent attempts to use it
>
> (OK, for the rmem case this isn't actually 3 since falling back to
> io_tlb_default_mem might work out more like 2, but hopefully you get my
> point)
>
> Thanks,
> Robin.

How do you like this new version:
1) I removed the panic().
2) For swiotlb_update_mem_attributes() and swiotlb_init_late(), I print
an error message and disable swiotlb: the error seems so bad that IMO
we have to disable swiotlb.
3) No change to rmem_swiotlb_device_init(). The error in this function
doesn't seem fatal to me.

The bottom line is that set_memory_decrypted() should not fail silently
and cause weird issues later... BTW, normally IMO set_memory_decrypted()
doesn't fail, but I did see a failure because we're expectd to retry
(the failure is fixed by
https://lwn.net/ml/linux-kernel/20221121195151.21812-3-decui%40microsoft.com/)

--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -248,12 +248,20 @@ void __init swiotlb_update_mem_attributes(void)
struct io_tlb_mem *mem = &io_tlb_default_mem;
void *vaddr;
unsigned long bytes;
+ int rc;

if (!mem->nslabs || mem->late_alloc)
return;
vaddr = phys_to_virt(mem->start);
bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
- set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+
+ rc = set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+ if (rc) {
+ pr_err("Failed to decrypt swiotlb buffer (%d): disabling swiotlb!\n",
+ rc);
+ mem->nslabs = 0;
+ return;
+ }

mem->vaddr = swiotlb_mem_remap(mem, bytes);
if (!mem->vaddr)
@@ -391,7 +399,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
unsigned char *vstart = NULL;
- unsigned int order, area_order;
+ unsigned int order, area_order, slot_order;
bool retried = false;
int rc = 0;

@@ -442,19 +450,29 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
if (!mem->areas)
goto error_area;

+ slot_order = get_order(array_size(sizeof(*mem->slots), nslabs));
mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(array_size(sizeof(*mem->slots), nslabs)));
+ slot_order);
if (!mem->slots)
goto error_slots;

- set_memory_decrypted((unsigned long)vstart,
- (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+ rc = set_memory_decrypted((unsigned long)vstart,
+ (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+ if (rc) {
+ pr_err("Failed to decrypt swiotlb buffer (%d): disabling swiotlb!\n",
+ rc);
+ mem->nslabs = 0;
+ goto error_decrypted;
+ }
+
swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
default_nareas);

swiotlb_print_info();
return 0;

+error_decrypted:
+ free_pages((unsigned long)mem->slots, slot_order);
error_slots:
free_pages((unsigned long)mem->areas, area_order);
error_area:
@@ -986,6 +1004,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,

/* Set Per-device io tlb area to one */
unsigned int nareas = 1;
+ int rc = -ENOMEM;

/*
* Since multiple devices can share the same pool, the private data,
@@ -998,21 +1017,22 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
return -ENOMEM;

mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
- if (!mem->slots) {
- kfree(mem);
- return -ENOMEM;
- }
+ if (!mem->slots)
+ goto free_mem;

mem->areas = kcalloc(nareas, sizeof(*mem->areas),
GFP_KERNEL);
- if (!mem->areas) {
- kfree(mem->slots);
- kfree(mem);
- return -ENOMEM;
+ if (!mem->areas)
+ goto free_slots;
+
+ rc = set_memory_decrypted(
+ (unsigned long)phys_to_virt(rmem->base),
+ rmem->size >> PAGE_SHIFT);
+ if (rc) {
+ pr_err("Failed to decrypt rmem buffer (%d)\n", rc);
+ goto free_areas;
}

- set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
- rmem->size >> PAGE_SHIFT);
swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
false, nareas);
mem->for_alloc = true;
@@ -1025,6 +1045,14 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
dev->dma_io_tlb_mem = mem;

return 0;
+
+free_areas:
+ kfree(mem->areas);
+free_slots:
+ kfree(mem->slots);
+free_mem:
+ kfree(mem);
+ return rc;
}