2024-03-18 13:07:45

by Petr Tesarik

[permalink] [raw]
Subject: [PATCH v2 1/2] swiotlb: extend buffer pre-padding to alloc_align_mask if necessary

From: Petr Tesarik <[email protected]>

Allow a buffer pre-padding of up to alloc_align_mask. If the allocation
alignment is bigger than IO_TLB_SIZE and min_align_mask covers any non-zero
bits in the original address between IO_TLB_SIZE and alloc_align_mask,
these bits are not preserved in the swiotlb buffer address.

To fix this case, increase the allocation size and use a larger offset
within the allocated buffer. As a result, extra padding slots may be
allocated before the mapping start address.

Set the orig_addr in these padding slots to INVALID_PHYS_ADDR, because they
do not correspond to any CPU buffer and the data must never be synced.

The padding slots should be automatically released when the buffer is
unmapped. However, swiotlb_tbl_unmap_single() takes only the address of the
DMA buffer slot, not the first padding slot. Save the number of padding
slots in struct io_tlb_slot and use it to adjust the slot index in
swiotlb_release_slots(), so all allocated slots are properly freed.

Fixes: 2fd4fa5d3fb5 ("swiotlb: Fix alignment checks when both allocation and DMA masks are present")
Link: https://lore.kernel.org/linux-iommu/[email protected]/
Signed-off-by: Petr Tesarik <[email protected]>
---
kernel/dma/swiotlb.c | 35 +++++++++++++++++++++++++++++------
1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 86fe172b5958..aefb05ff55e7 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -69,11 +69,14 @@
* @alloc_size: Size of the allocated buffer.
* @list: The free list describing the number of free entries available
* from each index.
+ * @pad_slots: Number of preceding padding slots. Valid only in the first
+ * allocated non-padding slot.
*/
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
- unsigned int list;
+ unsigned short list;
+ unsigned short pad_slots;
};

static bool swiotlb_force_bounce;
@@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
mem->nslabs - i);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}

memset(vaddr, 0, bytes);
@@ -1328,11 +1332,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
unsigned long attrs)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset;
struct io_tlb_pool *pool;
unsigned int i;
int index;
phys_addr_t tlb_addr;
+ unsigned short pad_slots;

if (!mem || !mem->nslabs) {
dev_warn_ratelimited(dev,
@@ -1349,6 +1354,15 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}

+ /*
+ * Calculate buffer pre-padding within the allocated space. Use it to
+ * preserve the low bits of the original address according to device's
+ * min_align_mask. Limit the padding to alloc_align_mask or slot size
+ * (whichever is bigger); higher bits of the original address are
+ * preserved by selecting a suitable IO TLB slot.
+ */
+ offset = orig_addr & dma_get_min_align_mask(dev) &
+ (alloc_align_mask | (IO_TLB_SIZE - 1));
index = swiotlb_find_slots(dev, orig_addr,
alloc_size + offset, alloc_align_mask, &pool);
if (index == -1) {
@@ -1364,6 +1378,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
+ pad_slots = offset / IO_TLB_SIZE;
+ offset %= IO_TLB_SIZE;
+ index += pad_slots;
+ pool->slots[index].pad_slots = i;
for (i = 0; i < nr_slots(alloc_size + offset); i++)
pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
tlb_addr = slot_addr(pool->start, index) + offset;
@@ -1385,12 +1403,16 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
unsigned long flags;
unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
- int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
- int nslots = nr_slots(mem->slots[index].alloc_size + offset);
- int aindex = index / mem->area_nslabs;
- struct io_tlb_area *area = &mem->areas[aindex];
+ int index, nslots, aindex;
+ struct io_tlb_area *area;
int count, i;

+ index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+ index -= mem->slots[index].pad_slots;
+ nslots = nr_slots(mem->slots[index].alloc_size + offset);
+ aindex = index / mem->area_nslabs;
+ area = &mem->areas[aindex];
+
/*
* Return the buffer to the free list by setting the corresponding
* entries to indicate the number of contiguous entries available.
@@ -1413,6 +1435,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}

/*
--
2.34.1



2024-03-18 15:37:33

by Michael Kelley

[permalink] [raw]
Subject: RE: [PATCH v2 1/2] swiotlb: extend buffer pre-padding to alloc_align_mask if necessary

From: Petr Tesarik <[email protected]> Sent: Monday, March 18, 2024 6:05 AM
>
> Allow a buffer pre-padding of up to alloc_align_mask. If the allocation
> alignment is bigger than IO_TLB_SIZE and min_align_mask covers any non-zero
> bits in the original address between IO_TLB_SIZE and alloc_align_mask,
> these bits are not preserved in the swiotlb buffer address.
>
> To fix this case, increase the allocation size and use a larger offset
> within the allocated buffer. As a result, extra padding slots may be
> allocated before the mapping start address.
>
> Set the orig_addr in these padding slots to INVALID_PHYS_ADDR, because they
> do not correspond to any CPU buffer and the data must never be synced.

This paragraph is now obsolete.

>
> The padding slots should be automatically released when the buffer is
> unmapped. However, swiotlb_tbl_unmap_single() takes only the address of the
> DMA buffer slot, not the first padding slot. Save the number of padding
> slots in struct io_tlb_slot and use it to adjust the slot index in
> swiotlb_release_slots(), so all allocated slots are properly freed.
>
> Fixes: 2fd4fa5d3fb5 ("swiotlb: Fix alignment checks when both allocation and DMA masks are present")
> Link: https://lore.kernel.org/linux-iommu/[email protected]/
> Signed-off-by: Petr Tesarik <[email protected]>
> ---
> kernel/dma/swiotlb.c | 35 +++++++++++++++++++++++++++++------
> 1 file changed, 29 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 86fe172b5958..aefb05ff55e7 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -69,11 +69,14 @@
> * @alloc_size: Size of the allocated buffer.
> * @list: The free list describing the number of free entries available
> * from each index.
> + * @pad_slots: Number of preceding padding slots. Valid only in the first
> + * allocated non-padding slot.
> */
> struct io_tlb_slot {
> phys_addr_t orig_addr;
> size_t alloc_size;
> - unsigned int list;
> + unsigned short list;
> + unsigned short pad_slots;
> };
>
> static bool swiotlb_force_bounce;
> @@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
> mem->nslabs - i);
> mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
> mem->slots[i].alloc_size = 0;
> + mem->slots[i].pad_slots = 0;
> }
>
> memset(vaddr, 0, bytes);
> @@ -1328,11 +1332,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> unsigned long attrs)
> {
> struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> - unsigned int offset = swiotlb_align_offset(dev, orig_addr);
> + unsigned int offset;
> struct io_tlb_pool *pool;
> unsigned int i;
> int index;
> phys_addr_t tlb_addr;
> + unsigned short pad_slots;
>
> if (!mem || !mem->nslabs) {
> dev_warn_ratelimited(dev,
> @@ -1349,6 +1354,15 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> return (phys_addr_t)DMA_MAPPING_ERROR;
> }
>
> + /*
> + * Calculate buffer pre-padding within the allocated space. Use it to
> + * preserve the low bits of the original address according to device's
> + * min_align_mask. Limit the padding to alloc_align_mask or slot size
> + * (whichever is bigger); higher bits of the original address are
> + * preserved by selecting a suitable IO TLB slot.
> + */
> + offset = orig_addr & dma_get_min_align_mask(dev) &
> + (alloc_align_mask | (IO_TLB_SIZE - 1));
> index = swiotlb_find_slots(dev, orig_addr,
> alloc_size + offset, alloc_align_mask, &pool);
> if (index == -1) {
> @@ -1364,6 +1378,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> * This is needed when we sync the memory. Then we sync the buffer if
> * needed.
> */
> + pad_slots = offset / IO_TLB_SIZE;
> + offset %= IO_TLB_SIZE;
> + index += pad_slots;
> + pool->slots[index].pad_slots = i;

The above line should be:
pool->slots[index].pad_slots = pad_slots;

> for (i = 0; i < nr_slots(alloc_size + offset); i++)
> pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
> tlb_addr = slot_addr(pool->start, index) + offset;
> @@ -1385,12 +1403,16 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
> unsigned long flags;
> unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
> - int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> - int nslots = nr_slots(mem->slots[index].alloc_size + offset);
> - int aindex = index / mem->area_nslabs;
> - struct io_tlb_area *area = &mem->areas[aindex];
> + int index, nslots, aindex;
> + struct io_tlb_area *area;
> int count, i;
>
> + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> + index -= mem->slots[index].pad_slots;
> + nslots = nr_slots(mem->slots[index].alloc_size + offset);
> + aindex = index / mem->area_nslabs;
> + area = &mem->areas[aindex];
> +
> /*
> * Return the buffer to the free list by setting the corresponding
> * entries to indicate the number of contiguous entries available.
> @@ -1413,6 +1435,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> mem->slots[i].list = ++count;
> mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
> mem->slots[i].alloc_size = 0;
> + mem->slots[i].pad_slots = 0;
> }
>
> /*
> --
> 2.34.1


2024-03-18 18:50:58

by Petr Tesařík

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] swiotlb: extend buffer pre-padding to alloc_align_mask if necessary

On Mon, 18 Mar 2024 15:37:16 +0000
Michael Kelley <[email protected]> wrote:

> From: Petr Tesarik <[email protected]> Sent: Monday, March 18, 2024 6:05 AM
> >
> > Allow a buffer pre-padding of up to alloc_align_mask. If the allocation
> > alignment is bigger than IO_TLB_SIZE and min_align_mask covers any non-zero
> > bits in the original address between IO_TLB_SIZE and alloc_align_mask,
> > these bits are not preserved in the swiotlb buffer address.
> >
> > To fix this case, increase the allocation size and use a larger offset
> > within the allocated buffer. As a result, extra padding slots may be
> > allocated before the mapping start address.
> >
> > Set the orig_addr in these padding slots to INVALID_PHYS_ADDR, because they
> > do not correspond to any CPU buffer and the data must never be synced.
>
> This paragraph is now obsolete.

Right. It should now say that orig_addr already _is_ set to
INVALID_PHYS_ADDR, so attempts to sync data will be ignored.

> >
> > The padding slots should be automatically released when the buffer is
> > unmapped. However, swiotlb_tbl_unmap_single() takes only the address of the
> > DMA buffer slot, not the first padding slot. Save the number of padding
> > slots in struct io_tlb_slot and use it to adjust the slot index in
> > swiotlb_release_slots(), so all allocated slots are properly freed.
> >
> > Fixes: 2fd4fa5d3fb5 ("swiotlb: Fix alignment checks when both allocation and DMA masks are present")
> > Link: https://lore.kernel.org/linux-iommu/[email protected]/
> > Signed-off-by: Petr Tesarik <[email protected]>
> > ---
> > kernel/dma/swiotlb.c | 35 +++++++++++++++++++++++++++++------
> > 1 file changed, 29 insertions(+), 6 deletions(-)
> >
> > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> > index 86fe172b5958..aefb05ff55e7 100644
> > --- a/kernel/dma/swiotlb.c
> > +++ b/kernel/dma/swiotlb.c
> > @@ -69,11 +69,14 @@
> > * @alloc_size: Size of the allocated buffer.
> > * @list: The free list describing the number of free entries available
> > * from each index.
> > + * @pad_slots: Number of preceding padding slots. Valid only in the first
> > + * allocated non-padding slot.
> > */
> > struct io_tlb_slot {
> > phys_addr_t orig_addr;
> > size_t alloc_size;
> > - unsigned int list;
> > + unsigned short list;
> > + unsigned short pad_slots;
> > };
> >
> > static bool swiotlb_force_bounce;
> > @@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
> > mem->nslabs - i);
> > mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
> > mem->slots[i].alloc_size = 0;
> > + mem->slots[i].pad_slots = 0;
> > }
> >
> > memset(vaddr, 0, bytes);
> > @@ -1328,11 +1332,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> > unsigned long attrs)
> > {
> > struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> > - unsigned int offset = swiotlb_align_offset(dev, orig_addr);
> > + unsigned int offset;
> > struct io_tlb_pool *pool;
> > unsigned int i;
> > int index;
> > phys_addr_t tlb_addr;
> > + unsigned short pad_slots;
> >
> > if (!mem || !mem->nslabs) {
> > dev_warn_ratelimited(dev,
> > @@ -1349,6 +1354,15 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> > return (phys_addr_t)DMA_MAPPING_ERROR;
> > }
> >
> > + /*
> > + * Calculate buffer pre-padding within the allocated space. Use it to
> > + * preserve the low bits of the original address according to device's
> > + * min_align_mask. Limit the padding to alloc_align_mask or slot size
> > + * (whichever is bigger); higher bits of the original address are
> > + * preserved by selecting a suitable IO TLB slot.
> > + */
> > + offset = orig_addr & dma_get_min_align_mask(dev) &
> > + (alloc_align_mask | (IO_TLB_SIZE - 1));
> > index = swiotlb_find_slots(dev, orig_addr,
> > alloc_size + offset, alloc_align_mask, &pool);
> > if (index == -1) {
> > @@ -1364,6 +1378,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> > * This is needed when we sync the memory. Then we sync the buffer if
> > * needed.
> > */
> > + pad_slots = offset / IO_TLB_SIZE;
> > + offset %= IO_TLB_SIZE;
> > + index += pad_slots;
> > + pool->slots[index].pad_slots = i;
>
> The above line should be:
> pool->slots[index].pad_slots = pad_slots;

Doh. Yes, I rewrote it a few times and then forgot to change this.

How did it even pass the test suite?

Thank you for the review.

Petr T

> > for (i = 0; i < nr_slots(alloc_size + offset); i++)
> > pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
> > tlb_addr = slot_addr(pool->start, index) + offset;
> > @@ -1385,12 +1403,16 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> > struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
> > unsigned long flags;
> > unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
> > - int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> > - int nslots = nr_slots(mem->slots[index].alloc_size + offset);
> > - int aindex = index / mem->area_nslabs;
> > - struct io_tlb_area *area = &mem->areas[aindex];
> > + int index, nslots, aindex;
> > + struct io_tlb_area *area;
> > int count, i;
> >
> > + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> > + index -= mem->slots[index].pad_slots;
> > + nslots = nr_slots(mem->slots[index].alloc_size + offset);
> > + aindex = index / mem->area_nslabs;
> > + area = &mem->areas[aindex];
> > +
> > /*
> > * Return the buffer to the free list by setting the corresponding
> > * entries to indicate the number of contiguous entries available.
> > @@ -1413,6 +1435,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> > mem->slots[i].list = ++count;
> > mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
> > mem->slots[i].alloc_size = 0;
> > + mem->slots[i].pad_slots = 0;
> > }
> >
> > /*
> > --
> > 2.34.1
>


2024-03-19 04:46:38

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] swiotlb: extend buffer pre-padding to alloc_align_mask if necessary

Hi Petr,

kernel test robot noticed the following build warnings:

[auto build test WARNING on v6.8]
[also build test WARNING on linus/master next-20240318]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Petr-Tesarik/swiotlb-extend-buffer-pre-padding-to-alloc_align_mask-if-necessary/20240318-212500
base: v6.8
patch link: https://lore.kernel.org/r/20240318130447.594-2-petrtesarik%40huaweicloud.com
patch subject: [PATCH v2 1/2] swiotlb: extend buffer pre-padding to alloc_align_mask if necessary
config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20240319/[email protected]/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 8f68022f8e6e54d1aeae4ed301f5a015963089b7)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240319/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

In file included from kernel/dma/swiotlb.c:27:
In file included from include/linux/dma-direct.h:9:
In file included from include/linux/dma-mapping.h:8:
In file included from include/linux/device.h:32:
In file included from include/linux/device/driver.h:21:
In file included from include/linux/module.h:19:
In file included from include/linux/elf.h:6:
In file included from arch/s390/include/asm/elf.h:173:
In file included from arch/s390/include/asm/mmu_context.h:11:
In file included from arch/s390/include/asm/pgalloc.h:18:
In file included from include/linux/mm.h:2188:
include/linux/vmstat.h:522:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
522 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
In file included from kernel/dma/swiotlb.c:27:
In file included from include/linux/dma-direct.h:9:
In file included from include/linux/dma-mapping.h:11:
In file included from include/linux/scatterlist.h:9:
In file included from arch/s390/include/asm/io.h:78:
include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
547 | val = __raw_readb(PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
560 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
| ^
include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
| ^
In file included from kernel/dma/swiotlb.c:27:
In file included from include/linux/dma-direct.h:9:
In file included from include/linux/dma-mapping.h:11:
In file included from include/linux/scatterlist.h:9:
In file included from arch/s390/include/asm/io.h:78:
include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
573 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
| ^
include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
| ^
In file included from kernel/dma/swiotlb.c:27:
In file included from include/linux/dma-direct.h:9:
In file included from include/linux/dma-mapping.h:11:
In file included from include/linux/scatterlist.h:9:
In file included from arch/s390/include/asm/io.h:78:
include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
584 | __raw_writeb(value, PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
594 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
604 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
692 | readsb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
700 | readsw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
708 | readsl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
717 | writesb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
726 | writesw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
735 | writesl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
>> kernel/dma/swiotlb.c:1348:33: warning: variable 'i' is uninitialized when used here [-Wuninitialized]
1348 | pool->slots[index].pad_slots = i;
| ^
kernel/dma/swiotlb.c:1301:16: note: initialize the variable 'i' to silence this warning
1301 | unsigned int i;
| ^
| = 0
kernel/dma/swiotlb.c:1643:20: warning: unused function 'swiotlb_create_debugfs_files' [-Wunused-function]
1643 | static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
15 warnings generated.


vim +/i +1348 kernel/dma/swiotlb.c

1292
1293 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
1294 size_t mapping_size, size_t alloc_size,
1295 unsigned int alloc_align_mask, enum dma_data_direction dir,
1296 unsigned long attrs)
1297 {
1298 struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
1299 unsigned int offset;
1300 struct io_tlb_pool *pool;
1301 unsigned int i;
1302 int index;
1303 phys_addr_t tlb_addr;
1304 unsigned short pad_slots;
1305
1306 if (!mem || !mem->nslabs) {
1307 dev_warn_ratelimited(dev,
1308 "Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
1309 return (phys_addr_t)DMA_MAPPING_ERROR;
1310 }
1311
1312 if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
1313 pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
1314
1315 if (mapping_size > alloc_size) {
1316 dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
1317 mapping_size, alloc_size);
1318 return (phys_addr_t)DMA_MAPPING_ERROR;
1319 }
1320
1321 /*
1322 * Calculate buffer pre-padding within the allocated space. Use it to
1323 * preserve the low bits of the original address according to device's
1324 * min_align_mask. Limit the padding to alloc_align_mask or slot size
1325 * (whichever is bigger); higher bits of the original address are
1326 * preserved by selecting a suitable IO TLB slot.
1327 */
1328 offset = orig_addr & dma_get_min_align_mask(dev) &
1329 (alloc_align_mask | (IO_TLB_SIZE - 1));
1330 index = swiotlb_find_slots(dev, orig_addr,
1331 alloc_size + offset, alloc_align_mask, &pool);
1332 if (index == -1) {
1333 if (!(attrs & DMA_ATTR_NO_WARN))
1334 dev_warn_ratelimited(dev,
1335 "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
1336 alloc_size, mem->nslabs, mem_used(mem));
1337 return (phys_addr_t)DMA_MAPPING_ERROR;
1338 }
1339
1340 /*
1341 * Save away the mapping from the original address to the DMA address.
1342 * This is needed when we sync the memory. Then we sync the buffer if
1343 * needed.
1344 */
1345 pad_slots = offset / IO_TLB_SIZE;
1346 offset %= IO_TLB_SIZE;
1347 index += pad_slots;
> 1348 pool->slots[index].pad_slots = i;
1349 for (i = 0; i < nr_slots(alloc_size + offset); i++)
1350 pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
1351 tlb_addr = slot_addr(pool->start, index) + offset;
1352 /*
1353 * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy
1354 * the original buffer to the TLB buffer before initiating DMA in order
1355 * to preserve the original's data if the device does a partial write,
1356 * i.e. if the device doesn't overwrite the entire buffer. Preserving
1357 * the original data, even if it's garbage, is necessary to match
1358 * hardware behavior. Use of swiotlb is supposed to be transparent,
1359 * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes.
1360 */
1361 swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
1362 return tlb_addr;
1363 }
1364

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki