There are several reports that the DMA sync shortcut broke non-coherent
devices.
dev->dma_need_sync is false after the &device allocation and if a driver
didn't call dma_set_mask*(), it will still be false even if the device
is not DMA-coherent and thus needs synchronizing. Due to historical
reasons, there's still a lot of drivers not calling it.
Invert the boolean, so that the sync will be performed by default and
the shortcut will be enabled only when calling dma_set_mask*().
Reported-by: Marek Szyprowski <[email protected]>
Closes: https://lore.kernel.org/lkml/[email protected]
Reported-by: Steven Price <[email protected]>
Closes: https://lore.kernel.org/lkml/[email protected]
Fixes: 32ba8b823252 ("dma: avoid redundant calls for sync operations")
Signed-off-by: Alexander Lobakin <[email protected]>
---
include/linux/device.h | 4 ++--
include/linux/dma-map-ops.h | 4 ++--
include/linux/dma-mapping.h | 2 +-
kernel/dma/mapping.c | 10 +++++-----
kernel/dma/swiotlb.c | 2 +-
5 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/include/linux/device.h b/include/linux/device.h
index ed95b829f05b..d4b50accff26 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -691,7 +691,7 @@ struct device_physical_location {
* and optionall (if the coherent mask is large enough) also
* for dma allocations. This flag is managed by the dma ops
* instance from ->dma_supported.
- * @dma_need_sync: The device needs performing DMA sync operations.
+ * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers.
*
* At the lowest level, every device in a Linux system is represented by an
* instance of struct device. The device structure contains the information
@@ -805,7 +805,7 @@ struct device {
bool dma_ops_bypass : 1;
#endif
#ifdef CONFIG_DMA_NEED_SYNC
- bool dma_need_sync:1;
+ bool dma_skip_sync:1;
#endif
};
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 4893cb89cb52..5217b922d29f 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -280,8 +280,8 @@ static inline void dma_reset_need_sync(struct device *dev)
{
#ifdef CONFIG_DMA_NEED_SYNC
/* Reset it only once so that the function can be called on hotpath */
- if (unlikely(!dev->dma_need_sync))
- dev->dma_need_sync = true;
+ if (unlikely(dev->dma_skip_sync))
+ dev->dma_skip_sync = false;
#endif
}
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index eb4e15893b6c..f693aafe221f 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -295,7 +295,7 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr);
static inline bool dma_dev_need_sync(const struct device *dev)
{
/* Always call DMA sync operations when debugging is enabled */
- return dev->dma_need_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG);
+ return !dev->dma_skip_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG);
}
static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 3524bc92c37f..3f77c3f8d16d 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -392,7 +392,7 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
if (dma_map_direct(dev, ops))
/*
- * dma_need_sync could've been reset on first SWIOTLB buffer
+ * dma_skip_sync could've been reset on first SWIOTLB buffer
* mapping, but @dma_addr is not necessary an SWIOTLB buffer.
* In this case, fall back to more granular check.
*/
@@ -407,20 +407,20 @@ static void dma_setup_need_sync(struct device *dev)
if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC))
/*
- * dma_need_sync will be reset to %true on first SWIOTLB buffer
+ * dma_skip_sync will be reset to %false on first SWIOTLB buffer
* mapping, if any. During the device initialization, it's
* enough to check only for the DMA coherence.
*/
- dev->dma_need_sync = !dev_is_dma_coherent(dev);
+ dev->dma_skip_sync = dev_is_dma_coherent(dev);
else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu &&
!ops->sync_sg_for_device && !ops->sync_sg_for_cpu)
/*
* Synchronization is not possible when none of DMA sync ops
* is set.
*/
- dev->dma_need_sync = false;
+ dev->dma_skip_sync = true;
else
- dev->dma_need_sync = true;
+ dev->dma_skip_sync = false;
}
#else /* !CONFIG_DMA_NEED_SYNC */
static inline void dma_setup_need_sync(struct device *dev) { }
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index ae3e593eaadb..068134697cf1 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -1409,7 +1409,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
}
/*
- * If dma_need_sync wasn't set, reset it on first SWIOTLB buffer
+ * If dma_skip_sync was set, reset it on first SWIOTLB buffer
* mapping to always sync SWIOTLB buffers.
*/
dma_reset_need_sync(dev);
--
2.45.0
From: Steven Price <[email protected]>
Date: Thu, 9 May 2024 16:11:26 +0100
> On 09/05/2024 15:46, Alexander Lobakin wrote:
>> There are several reports that the DMA sync shortcut broke non-coherent
>> devices.
>> dev->dma_need_sync is false after the &device allocation and if a driver
>> didn't call dma_set_mask*(), it will still be false even if the device
>> is not DMA-coherent and thus needs synchronizing. Due to historical
>> reasons, there's still a lot of drivers not calling it.
>> Invert the boolean, so that the sync will be performed by default and
>> the shortcut will be enabled only when calling dma_set_mask*().
>>
>> Reported-by: Marek Szyprowski <[email protected]>
>> Closes: https://lore.kernel.org/lkml/[email protected]
>> Reported-by: Steven Price <[email protected]>
>> Closes: https://lore.kernel.org/lkml/[email protected]
>> Fixes: 32ba8b823252 ("dma: avoid redundant calls for sync operations")
>> Signed-off-by: Alexander Lobakin <[email protected]>
>
> Tested-by: Steven Price <[email protected]>
Thank!
>
> Thanks for the quick fix.
>
> Note that the fixes hash (32ba8b823252) is not the one in linux-next -
> that's f406c8e4b770. If the branch is getting rebased then no problem, I
> just thought I should point that out.
Oh crap, it really should be f406. Wrong tree again >_<
Chris, would you fix it when applying or I should resend?
>
> Thanks,
> Steve
Thanks,
Olek
On 09/05/2024 15:46, Alexander Lobakin wrote:
> There are several reports that the DMA sync shortcut broke non-coherent
> devices.
> dev->dma_need_sync is false after the &device allocation and if a driver
> didn't call dma_set_mask*(), it will still be false even if the device
> is not DMA-coherent and thus needs synchronizing. Due to historical
> reasons, there's still a lot of drivers not calling it.
> Invert the boolean, so that the sync will be performed by default and
> the shortcut will be enabled only when calling dma_set_mask*().
>
> Reported-by: Marek Szyprowski <[email protected]>
> Closes: https://lore.kernel.org/lkml/[email protected]
> Reported-by: Steven Price <[email protected]>
> Closes: https://lore.kernel.org/lkml/[email protected]
> Fixes: 32ba8b823252 ("dma: avoid redundant calls for sync operations")
> Signed-off-by: Alexander Lobakin <[email protected]>
Tested-by: Steven Price <[email protected]>
Thanks for the quick fix.
Note that the fixes hash (32ba8b823252) is not the one in linux-next -
that's f406c8e4b770. If the branch is getting rebased then no problem, I
just thought I should point that out.
Thanks,
Steve
> ---
> include/linux/device.h | 4 ++--
> include/linux/dma-map-ops.h | 4 ++--
> include/linux/dma-mapping.h | 2 +-
> kernel/dma/mapping.c | 10 +++++-----
> kernel/dma/swiotlb.c | 2 +-
> 5 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/device.h b/include/linux/device.h
> index ed95b829f05b..d4b50accff26 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -691,7 +691,7 @@ struct device_physical_location {
> * and optionall (if the coherent mask is large enough) also
> * for dma allocations. This flag is managed by the dma ops
> * instance from ->dma_supported.
> - * @dma_need_sync: The device needs performing DMA sync operations.
> + * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers.
> *
> * At the lowest level, every device in a Linux system is represented by an
> * instance of struct device. The device structure contains the information
> @@ -805,7 +805,7 @@ struct device {
> bool dma_ops_bypass : 1;
> #endif
> #ifdef CONFIG_DMA_NEED_SYNC
> - bool dma_need_sync:1;
> + bool dma_skip_sync:1;
> #endif
> };
>
> diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
> index 4893cb89cb52..5217b922d29f 100644
> --- a/include/linux/dma-map-ops.h
> +++ b/include/linux/dma-map-ops.h
> @@ -280,8 +280,8 @@ static inline void dma_reset_need_sync(struct device *dev)
> {
> #ifdef CONFIG_DMA_NEED_SYNC
> /* Reset it only once so that the function can be called on hotpath */
> - if (unlikely(!dev->dma_need_sync))
> - dev->dma_need_sync = true;
> + if (unlikely(dev->dma_skip_sync))
> + dev->dma_skip_sync = false;
> #endif
> }
>
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index eb4e15893b6c..f693aafe221f 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -295,7 +295,7 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr);
> static inline bool dma_dev_need_sync(const struct device *dev)
> {
> /* Always call DMA sync operations when debugging is enabled */
> - return dev->dma_need_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG);
> + return !dev->dma_skip_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG);
> }
>
> static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
> index 3524bc92c37f..3f77c3f8d16d 100644
> --- a/kernel/dma/mapping.c
> +++ b/kernel/dma/mapping.c
> @@ -392,7 +392,7 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
>
> if (dma_map_direct(dev, ops))
> /*
> - * dma_need_sync could've been reset on first SWIOTLB buffer
> + * dma_skip_sync could've been reset on first SWIOTLB buffer
> * mapping, but @dma_addr is not necessary an SWIOTLB buffer.
> * In this case, fall back to more granular check.
> */
> @@ -407,20 +407,20 @@ static void dma_setup_need_sync(struct device *dev)
>
> if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC))
> /*
> - * dma_need_sync will be reset to %true on first SWIOTLB buffer
> + * dma_skip_sync will be reset to %false on first SWIOTLB buffer
> * mapping, if any. During the device initialization, it's
> * enough to check only for the DMA coherence.
> */
> - dev->dma_need_sync = !dev_is_dma_coherent(dev);
> + dev->dma_skip_sync = dev_is_dma_coherent(dev);
> else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu &&
> !ops->sync_sg_for_device && !ops->sync_sg_for_cpu)
> /*
> * Synchronization is not possible when none of DMA sync ops
> * is set.
> */
> - dev->dma_need_sync = false;
> + dev->dma_skip_sync = true;
> else
> - dev->dma_need_sync = true;
> + dev->dma_skip_sync = false;
> }
> #else /* !CONFIG_DMA_NEED_SYNC */
> static inline void dma_setup_need_sync(struct device *dev) { }
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index ae3e593eaadb..068134697cf1 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -1409,7 +1409,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> }
>
> /*
> - * If dma_need_sync wasn't set, reset it on first SWIOTLB buffer
> + * If dma_skip_sync was set, reset it on first SWIOTLB buffer
> * mapping to always sync SWIOTLB buffers.
> */
> dma_reset_need_sync(dev);
On Thu, May 09, 2024 at 05:16:13PM +0200, Alexander Lobakin wrote:
> Oh crap, it really should be f406. Wrong tree again >_<
I've fixed this up and added the Tested-by in my local tree. I'll push
it out after a bit of testing. More reviews or tested-bys are still
welcome.
On 09.05.2024 16:46, Alexander Lobakin wrote:
> There are several reports that the DMA sync shortcut broke non-coherent
> devices.
> dev->dma_need_sync is false after the &device allocation and if a driver
> didn't call dma_set_mask*(), it will still be false even if the device
> is not DMA-coherent and thus needs synchronizing. Due to historical
> reasons, there's still a lot of drivers not calling it.
> Invert the boolean, so that the sync will be performed by default and
> the shortcut will be enabled only when calling dma_set_mask*().
>
> Reported-by: Marek Szyprowski <[email protected]>
> Closes: https://lore.kernel.org/lkml/[email protected]
> Reported-by: Steven Price <[email protected]>
> Closes: https://lore.kernel.org/lkml/[email protected]
> Fixes: 32ba8b823252 ("dma: avoid redundant calls for sync operations")
> Signed-off-by: Alexander Lobakin <[email protected]>
Tested-by: Marek Szyprowski <[email protected]>
> ---
> include/linux/device.h | 4 ++--
> include/linux/dma-map-ops.h | 4 ++--
> include/linux/dma-mapping.h | 2 +-
> kernel/dma/mapping.c | 10 +++++-----
> kernel/dma/swiotlb.c | 2 +-
> 5 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/device.h b/include/linux/device.h
> index ed95b829f05b..d4b50accff26 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -691,7 +691,7 @@ struct device_physical_location {
> * and optionall (if the coherent mask is large enough) also
> * for dma allocations. This flag is managed by the dma ops
> * instance from ->dma_supported.
> - * @dma_need_sync: The device needs performing DMA sync operations.
> + * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers.
> *
> * At the lowest level, every device in a Linux system is represented by an
> * instance of struct device. The device structure contains the information
> @@ -805,7 +805,7 @@ struct device {
> bool dma_ops_bypass : 1;
> #endif
> #ifdef CONFIG_DMA_NEED_SYNC
> - bool dma_need_sync:1;
> + bool dma_skip_sync:1;
> #endif
> };
>
> diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
> index 4893cb89cb52..5217b922d29f 100644
> --- a/include/linux/dma-map-ops.h
> +++ b/include/linux/dma-map-ops.h
> @@ -280,8 +280,8 @@ static inline void dma_reset_need_sync(struct device *dev)
> {
> #ifdef CONFIG_DMA_NEED_SYNC
> /* Reset it only once so that the function can be called on hotpath */
> - if (unlikely(!dev->dma_need_sync))
> - dev->dma_need_sync = true;
> + if (unlikely(dev->dma_skip_sync))
> + dev->dma_skip_sync = false;
> #endif
> }
>
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index eb4e15893b6c..f693aafe221f 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -295,7 +295,7 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr);
> static inline bool dma_dev_need_sync(const struct device *dev)
> {
> /* Always call DMA sync operations when debugging is enabled */
> - return dev->dma_need_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG);
> + return !dev->dma_skip_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG);
> }
>
> static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
> index 3524bc92c37f..3f77c3f8d16d 100644
> --- a/kernel/dma/mapping.c
> +++ b/kernel/dma/mapping.c
> @@ -392,7 +392,7 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
>
> if (dma_map_direct(dev, ops))
> /*
> - * dma_need_sync could've been reset on first SWIOTLB buffer
> + * dma_skip_sync could've been reset on first SWIOTLB buffer
> * mapping, but @dma_addr is not necessary an SWIOTLB buffer.
> * In this case, fall back to more granular check.
> */
> @@ -407,20 +407,20 @@ static void dma_setup_need_sync(struct device *dev)
>
> if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC))
> /*
> - * dma_need_sync will be reset to %true on first SWIOTLB buffer
> + * dma_skip_sync will be reset to %false on first SWIOTLB buffer
> * mapping, if any. During the device initialization, it's
> * enough to check only for the DMA coherence.
> */
> - dev->dma_need_sync = !dev_is_dma_coherent(dev);
> + dev->dma_skip_sync = dev_is_dma_coherent(dev);
> else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu &&
> !ops->sync_sg_for_device && !ops->sync_sg_for_cpu)
> /*
> * Synchronization is not possible when none of DMA sync ops
> * is set.
> */
> - dev->dma_need_sync = false;
> + dev->dma_skip_sync = true;
> else
> - dev->dma_need_sync = true;
> + dev->dma_skip_sync = false;
> }
> #else /* !CONFIG_DMA_NEED_SYNC */
> static inline void dma_setup_need_sync(struct device *dev) { }
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index ae3e593eaadb..068134697cf1 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -1409,7 +1409,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> }
>
> /*
> - * If dma_need_sync wasn't set, reset it on first SWIOTLB buffer
> + * If dma_skip_sync was set, reset it on first SWIOTLB buffer
> * mapping to always sync SWIOTLB buffers.
> */
> dma_reset_need_sync(dev);
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On 5/9/24 16:46, Alexander Lobakin wrote:
> There are several reports that the DMA sync shortcut broke non-coherent
> devices.
> dev->dma_need_sync is false after the &device allocation and if a driver
> didn't call dma_set_mask*(), it will still be false even if the device
> is not DMA-coherent and thus needs synchronizing. Due to historical
> reasons, there's still a lot of drivers not calling it.
> Invert the boolean, so that the sync will be performed by default and
> the shortcut will be enabled only when calling dma_set_mask*().
>
> Reported-by: Marek Szyprowski <[email protected]>
> Closes: https://lore.kernel.org/lkml/[email protected]
> Reported-by: Steven Price <[email protected]>
> Closes: https://lore.kernel.org/lkml/[email protected]
> Fixes: 32ba8b823252 ("dma: avoid redundant calls for sync operations")
> Signed-off-by: Alexander Lobakin <[email protected]>
> ---
> include/linux/device.h | 4 ++--
> include/linux/dma-map-ops.h | 4 ++--
> include/linux/dma-mapping.h | 2 +-
> kernel/dma/mapping.c | 10 +++++-----
> kernel/dma/swiotlb.c | 2 +-
> 5 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/device.h b/include/linux/device.h
> index ed95b829f05b..d4b50accff26 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -691,7 +691,7 @@ struct device_physical_location {
> * and optionall (if the coherent mask is large enough) also
> * for dma allocations. This flag is managed by the dma ops
> * instance from ->dma_supported.
> - * @dma_need_sync: The device needs performing DMA sync operations.
> + * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers.
> *
> * At the lowest level, every device in a Linux system is represented by an
> * instance of struct device. The device structure contains the information
> @@ -805,7 +805,7 @@ struct device {
> bool dma_ops_bypass : 1;
> #endif
> #ifdef CONFIG_DMA_NEED_SYNC
> - bool dma_need_sync:1;
> + bool dma_skip_sync:1;
> #endif
> };
>
very good solution with inverting the flag,
Reviewed-by: Przemek Kitszel <[email protected]>
// ...