Hi all,
This is version three of the patches I posted recently:
v1: https://lore.kernel.org/r/[email protected]
v2: https://lore.kernel.org/r/[email protected]
Thanks to Robin for the comments on the most recent version.
Changes since v2 include:
- Restore missing 'continue' statement that got accidentally dropped
while addressing the initial round of review feedback.
- Reword the commit message in patch #1
- Add a Fixes: tag to the last patch
Cheers,
Will
Cc: [email protected]
Cc: Christoph Hellwig <[email protected]>
Cc: Marek Szyprowski <[email protected]>
Cc: Robin Murphy <[email protected]>
Cc: Petr Tesarik <[email protected]>
Cc: Dexuan Cui <[email protected]>
--->8
Will Deacon (3):
swiotlb: Fix double-allocation of slots due to broken alignment
handling
swiotlb: Enforce page alignment in swiotlb_alloc()
swiotlb: Honour dma_alloc_coherent() alignment in swiotlb_alloc()
kernel/dma/swiotlb.c | 38 ++++++++++++++++++++++++--------------
1 file changed, 24 insertions(+), 14 deletions(-)
--
2.43.0.594.gd9cf4e227d-goog
Commit bbb73a103fbb ("swiotlb: fix a braino in the alignment check fix"),
which was a fix for commit 0eee5ae10256 ("swiotlb: fix slot alignment
checks"), causes a functional regression with vsock in a virtual machine
using bouncing via a restricted DMA SWIOTLB pool.
When virtio allocates the virtqueues for the vsock device using
dma_alloc_coherent(), the SWIOTLB search can return page-unaligned
allocations if 'area->index' was left unaligned by a previous allocation
from the buffer:
# Final address in brackets is the SWIOTLB address returned to the caller
| virtio-pci 0000:00:07.0: orig_addr 0x0 alloc_size 0x2000, iotlb_align_mask 0x800 stride 0x2: got slot 1645-1649/7168 (0x98326800)
| virtio-pci 0000:00:07.0: orig_addr 0x0 alloc_size 0x2000, iotlb_align_mask 0x800 stride 0x2: got slot 1649-1653/7168 (0x98328800)
| virtio-pci 0000:00:07.0: orig_addr 0x0 alloc_size 0x2000, iotlb_align_mask 0x800 stride 0x2: got slot 1653-1657/7168 (0x9832a800)
This ends badly (typically buffer corruption and/or a hang) because
swiotlb_alloc() is expecting a page-aligned allocation and so blindly
returns a pointer to the 'struct page' corresponding to the allocation,
therefore double-allocating the first half (2KiB slot) of the 4KiB page.
Fix the problem by treating the allocation alignment separately to any
additional alignment requirements from the device, using the maximum
of the two as the stride to search the buffer slots and taking care
to ensure a minimum of page-alignment for buffers larger than a page.
Fixes: bbb73a103fbb ("swiotlb: fix a braino in the alignment check fix")
Fixes: 0eee5ae10256 ("swiotlb: fix slot alignment checks")
Cc: Christoph Hellwig <[email protected]>
Cc: Marek Szyprowski <[email protected]>
Cc: Robin Murphy <[email protected]>
Cc: Petr Tesarik <[email protected]>
Cc: Dexuan Cui <[email protected]>
Signed-off-by: Will Deacon <[email protected]>
---
kernel/dma/swiotlb.c | 28 +++++++++++++++-------------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index b079a9a8e087..9ff909a0039a 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -982,7 +982,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
phys_to_dma_unencrypted(dev, pool->start) & boundary_mask;
unsigned long max_slots = get_max_slots(boundary_mask);
unsigned int iotlb_align_mask =
- dma_get_min_align_mask(dev) | alloc_align_mask;
+ dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
unsigned int nslots = nr_slots(alloc_size), stride;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
unsigned int index, slots_checked, count = 0, i;
@@ -993,19 +993,18 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
BUG_ON(!nslots);
BUG_ON(area_index >= pool->nareas);
+ /*
+ * For mappings with an alignment requirement don't bother looping to
+ * unaligned slots once we found an aligned one.
+ */
+ stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask));
+
/*
* For allocations of PAGE_SIZE or larger only look for page aligned
* allocations.
*/
if (alloc_size >= PAGE_SIZE)
- iotlb_align_mask |= ~PAGE_MASK;
- iotlb_align_mask &= ~(IO_TLB_SIZE - 1);
-
- /*
- * For mappings with an alignment requirement don't bother looping to
- * unaligned slots once we found an aligned one.
- */
- stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
+ stride = max(stride, PAGE_SHIFT - IO_TLB_SHIFT + 1);
spin_lock_irqsave(&area->lock, flags);
if (unlikely(nslots > pool->area_nslabs - area->used))
@@ -1015,11 +1014,14 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
index = area->index;
for (slots_checked = 0; slots_checked < pool->area_nslabs; ) {
- slot_index = slot_base + index;
+ phys_addr_t tlb_addr;
- if (orig_addr &&
- (slot_addr(tbl_dma_addr, slot_index) &
- iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
+ slot_index = slot_base + index;
+ tlb_addr = slot_addr(tbl_dma_addr, slot_index);
+
+ if ((tlb_addr & alloc_align_mask) ||
+ (orig_addr && (tlb_addr & iotlb_align_mask) !=
+ (orig_addr & iotlb_align_mask))) {
index = wrap_area_index(pool, index + 1);
slots_checked++;
continue;
--
2.43.0.594.gd9cf4e227d-goog
When allocating pages from a restricted DMA pool in swiotlb_alloc(),
the buffer address is blindly converted to a 'struct page *' that is
returned to the caller. In the unlikely event of an allocation bug,
page-unaligned addresses are not detected and slots can silently be
double-allocated.
Add a simple check of the buffer alignment in swiotlb_alloc() to make
debugging a little easier if something has gone wonky.
Cc: Christoph Hellwig <[email protected]>
Cc: Marek Szyprowski <[email protected]>
Cc: Robin Murphy <[email protected]>
Cc: Petr Tesarik <[email protected]>
Cc: Dexuan Cui <[email protected]>
Signed-off-by: Will Deacon <[email protected]>
---
kernel/dma/swiotlb.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 9ff909a0039a..adbb3143238b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -1643,6 +1643,12 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
return NULL;
tlb_addr = slot_addr(pool->start, index);
+ if (unlikely(!PAGE_ALIGNED(tlb_addr))) {
+ dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n",
+ &tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr);
+ return NULL;
+ }
return pfn_to_page(PFN_DOWN(tlb_addr));
}
--
2.43.0.594.gd9cf4e227d-goog
Robin and Petr, does this looks good to you now?
On Mon, Feb 05, 2024 at 07:01:24PM +0000, Will Deacon wrote:
> Hi all,
>
> This is version three of the patches I posted recently:
>
> v1: https://lore.kernel.org/r/[email protected]
> v2: https://lore.kernel.org/r/[email protected]
>
> Thanks to Robin for the comments on the most recent version.
>
> Changes since v2 include:
>
> - Restore missing 'continue' statement that got accidentally dropped
> while addressing the initial round of review feedback.
>
> - Reword the commit message in patch #1
>
> - Add a Fixes: tag to the last patch
>
> Cheers,
>
> Will
>
> Cc: [email protected]
> Cc: Christoph Hellwig <[email protected]>
> Cc: Marek Szyprowski <[email protected]>
> Cc: Robin Murphy <[email protected]>
> Cc: Petr Tesarik <[email protected]>
> Cc: Dexuan Cui <[email protected]>
>
> --->8
>
> Will Deacon (3):
> swiotlb: Fix double-allocation of slots due to broken alignment
> handling
> swiotlb: Enforce page alignment in swiotlb_alloc()
> swiotlb: Honour dma_alloc_coherent() alignment in swiotlb_alloc()
>
> kernel/dma/swiotlb.c | 38 ++++++++++++++++++++++++--------------
> 1 file changed, 24 insertions(+), 14 deletions(-)
>
> --
> 2.43.0.594.gd9cf4e227d-goog
---end quoted text---
Hey Christoph,
On Mon, Feb 19, 2024 at 07:35:27AM +0100, Christoph Hellwig wrote:
> Robin and Petr, does this looks good to you now?
FWIW, I'm likely to send a v4 addressing another issue reported by
Nicolin with NVME and 64k pages [1], so you may as well wait for that.
Cheers,
Will
[1] https://lkml.kernel.org/r/[email protected]
On Mon, 19 Feb 2024 09:24:39 +0000
Will Deacon <[email protected]> wrote:
> Hey Christoph,
>
> On Mon, Feb 19, 2024 at 07:35:27AM +0100, Christoph Hellwig wrote:
> > Robin and Petr, does this looks good to you now?
It looks good to me for the boot-time swiotlb pool. Dynamic allocation
of transient swiotlb pools does not take these additional alignment
constraints into account, so when allocation may fail. OTOH the
underlying allocator(s) do not provide a suitable API, so I don't think
it's worth fixing.
In the worst case, a DMA buffer will fail to map, which may already
happen today.
> FWIW, I'm likely to send a v4 addressing another issue reported by
> Nicolin with NVME and 64k pages [1], so you may as well wait for that.
I'm interested. The code look quite OK to me as it is, but I assume it
again uncovers something when the difference between PAGE_SHIFT and
IOTLB_SHIFT is more than one (which was motivation for my initial fix,
which in the end broke more than it fixed).
Petr T
On Mon, 5 Feb 2024 19:01:26 +0000
Will Deacon <[email protected]> wrote:
> When allocating pages from a restricted DMA pool in swiotlb_alloc(),
> the buffer address is blindly converted to a 'struct page *' that is
> returned to the caller. In the unlikely event of an allocation bug,
> page-unaligned addresses are not detected and slots can silently be
> double-allocated.
>
> Add a simple check of the buffer alignment in swiotlb_alloc() to make
> debugging a little easier if something has gone wonky.
>
> Cc: Christoph Hellwig <[email protected]>
> Cc: Marek Szyprowski <[email protected]>
> Cc: Robin Murphy <[email protected]>
> Cc: Petr Tesarik <[email protected]>
> Cc: Dexuan Cui <[email protected]>
> Signed-off-by: Will Deacon <[email protected]>
Reviewed-by: Petr Tesarik <[email protected]>
Petr T
> ---
> kernel/dma/swiotlb.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 9ff909a0039a..adbb3143238b 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -1643,6 +1643,12 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
> return NULL;
>
> tlb_addr = slot_addr(pool->start, index);
> + if (unlikely(!PAGE_ALIGNED(tlb_addr))) {
> + dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n",
> + &tlb_addr);
> + swiotlb_release_slots(dev, tlb_addr);
> + return NULL;
> + }
>
> return pfn_to_page(PFN_DOWN(tlb_addr));
> }
On Mon, 5 Feb 2024 19:01:25 +0000
Will Deacon <[email protected]> wrote:
> Commit bbb73a103fbb ("swiotlb: fix a braino in the alignment check fix"),
> which was a fix for commit 0eee5ae10256 ("swiotlb: fix slot alignment
> checks"), causes a functional regression with vsock in a virtual machine
> using bouncing via a restricted DMA SWIOTLB pool.
>
> When virtio allocates the virtqueues for the vsock device using
> dma_alloc_coherent(), the SWIOTLB search can return page-unaligned
> allocations if 'area->index' was left unaligned by a previous allocation
> from the buffer:
>
> # Final address in brackets is the SWIOTLB address returned to the caller
> | virtio-pci 0000:00:07.0: orig_addr 0x0 alloc_size 0x2000, iotlb_align_mask 0x800 stride 0x2: got slot 1645-1649/7168 (0x98326800)
> | virtio-pci 0000:00:07.0: orig_addr 0x0 alloc_size 0x2000, iotlb_align_mask 0x800 stride 0x2: got slot 1649-1653/7168 (0x98328800)
> | virtio-pci 0000:00:07.0: orig_addr 0x0 alloc_size 0x2000, iotlb_align_mask 0x800 stride 0x2: got slot 1653-1657/7168 (0x9832a800)
>
> This ends badly (typically buffer corruption and/or a hang) because
> swiotlb_alloc() is expecting a page-aligned allocation and so blindly
> returns a pointer to the 'struct page' corresponding to the allocation,
> therefore double-allocating the first half (2KiB slot) of the 4KiB page.
>
> Fix the problem by treating the allocation alignment separately to any
> additional alignment requirements from the device, using the maximum
> of the two as the stride to search the buffer slots and taking care
> to ensure a minimum of page-alignment for buffers larger than a page.
>
> Fixes: bbb73a103fbb ("swiotlb: fix a braino in the alignment check fix")
> Fixes: 0eee5ae10256 ("swiotlb: fix slot alignment checks")
> Cc: Christoph Hellwig <[email protected]>
> Cc: Marek Szyprowski <[email protected]>
> Cc: Robin Murphy <[email protected]>
> Cc: Petr Tesarik <[email protected]>
> Cc: Dexuan Cui <[email protected]>
> Signed-off-by: Will Deacon <[email protected]>
Reviewed-by: Petr Tesarik <[email protected]>
Petr T
> ---
> kernel/dma/swiotlb.c | 28 +++++++++++++++-------------
> 1 file changed, 15 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index b079a9a8e087..9ff909a0039a 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -982,7 +982,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
> phys_to_dma_unencrypted(dev, pool->start) & boundary_mask;
> unsigned long max_slots = get_max_slots(boundary_mask);
> unsigned int iotlb_align_mask =
> - dma_get_min_align_mask(dev) | alloc_align_mask;
> + dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
> unsigned int nslots = nr_slots(alloc_size), stride;
> unsigned int offset = swiotlb_align_offset(dev, orig_addr);
> unsigned int index, slots_checked, count = 0, i;
> @@ -993,19 +993,18 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
> BUG_ON(!nslots);
> BUG_ON(area_index >= pool->nareas);
>
> + /*
> + * For mappings with an alignment requirement don't bother looping to
> + * unaligned slots once we found an aligned one.
> + */
> + stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask));
> +
> /*
> * For allocations of PAGE_SIZE or larger only look for page aligned
> * allocations.
> */
> if (alloc_size >= PAGE_SIZE)
> - iotlb_align_mask |= ~PAGE_MASK;
> - iotlb_align_mask &= ~(IO_TLB_SIZE - 1);
> -
> - /*
> - * For mappings with an alignment requirement don't bother looping to
> - * unaligned slots once we found an aligned one.
> - */
> - stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
> + stride = max(stride, PAGE_SHIFT - IO_TLB_SHIFT + 1);
>
> spin_lock_irqsave(&area->lock, flags);
> if (unlikely(nslots > pool->area_nslabs - area->used))
> @@ -1015,11 +1014,14 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
> index = area->index;
>
> for (slots_checked = 0; slots_checked < pool->area_nslabs; ) {
> - slot_index = slot_base + index;
> + phys_addr_t tlb_addr;
>
> - if (orig_addr &&
> - (slot_addr(tbl_dma_addr, slot_index) &
> - iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
> + slot_index = slot_base + index;
> + tlb_addr = slot_addr(tbl_dma_addr, slot_index);
> +
> + if ((tlb_addr & alloc_align_mask) ||
> + (orig_addr && (tlb_addr & iotlb_align_mask) !=
> + (orig_addr & iotlb_align_mask))) {
> index = wrap_area_index(pool, index + 1);
> slots_checked++;
> continue;