LinuxLists.cc - [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used

2024-02-14 16:23:30

Subject: [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used

Some platforms do have DMA, but DMA there is always direct and coherent.
Currently, even on such platforms DMA sync operations are compiled and
called.
Add a new hidden Kconfig symbol, DMA_NEED_SYNC, and set it only when
either sync operations are needed or there is DMA ops or swiotlb
enabled. Set dma_need_sync() and dma_skip_sync() depending on this
symbol state and don't call sync ops when dma_skip_sync() is true.
The change allows for future optimizations of DMA sync calls depending
on compile-time or runtime conditions.

Signed-off-by: Alexander Lobakin <[email protected]>
---
kernel/dma/Kconfig | 4 ++
include/linux/dma-mapping.h | 80 +++++++++++++++++++++++++++++++------
kernel/dma/mapping.c | 20 +++++-----
3 files changed, 81 insertions(+), 23 deletions(-)

diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index d62f5957f36b..1c9ff05b1ecb 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -107,6 +107,10 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC
bool
depends on SWIOTLB

+config DMA_NEED_SYNC
+ def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \
+ ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB
+
config DMA_RESTRICTED_POOL
bool "DMA Restricted Pool"
depends on OF && OF_RESERVED_MEM && SWIOTLB
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4a658de44ee9..6c7640441214 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -117,13 +117,13 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs);
void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs);
-void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
+void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir);
-void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
+void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir);
-void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir);
-void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir);
void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flag, unsigned long attrs);
@@ -147,7 +147,7 @@ u64 dma_get_required_mask(struct device *dev);
bool dma_addressing_limited(struct device *dev);
size_t dma_max_mapping_size(struct device *dev);
size_t dma_opt_mapping_size(struct device *dev);
-bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
+bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr);
unsigned long dma_get_merge_boundary(struct device *dev);
struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
@@ -195,19 +195,19 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
}
-static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir)
+static inline void __dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
}
-static inline void dma_sync_single_for_device(struct device *dev,
+static inline void __dma_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
}
-static inline void dma_sync_sg_for_cpu(struct device *dev,
+static inline void __dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
}
-static inline void dma_sync_sg_for_device(struct device *dev,
+static inline void __dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
}
@@ -277,7 +277,7 @@ static inline size_t dma_opt_mapping_size(struct device *dev)
{
return 0;
}
-static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
+static inline bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return false;
}
@@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
}

+static inline void __dma_sync_single_range_for_cpu(struct device *dev,
+ dma_addr_t addr, unsigned long offset, size_t size,
+ enum dma_data_direction dir)
+{
+ __dma_sync_single_for_cpu(dev, addr + offset, size, dir);
+}
+
+static inline void __dma_sync_single_range_for_device(struct device *dev,
+ dma_addr_t addr, unsigned long offset, size_t size,
+ enum dma_data_direction dir)
+{
+ __dma_sync_single_for_device(dev, addr + offset, size, dir);
+}
+
+static inline bool dma_skip_sync(const struct device *dev)
+{
+ return !IS_ENABLED(CONFIG_DMA_NEED_SYNC);
+}
+
+static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
+{
+ return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
+}
+
+static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
+{
+ if (!dma_skip_sync(dev))
+ __dma_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static inline void dma_sync_single_for_device(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+ if (!dma_skip_sync(dev))
+ __dma_sync_single_for_device(dev, addr, size, dir);
+}
+
+static inline void dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sg, int nelems, enum dma_data_direction dir)
+{
+ if (!dma_skip_sync(dev))
+ __dma_sync_sg_for_cpu(dev, sg, nelems, dir);
+}
+
+static inline void dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg, int nelems, enum dma_data_direction dir)
+{
+ if (!dma_skip_sync(dev))
+ __dma_sync_sg_for_device(dev, sg, nelems, dir);
+}
+
static inline void dma_sync_single_range_for_cpu(struct device *dev,
dma_addr_t addr, unsigned long offset, size_t size,
enum dma_data_direction dir)
{
- return dma_sync_single_for_cpu(dev, addr + offset, size, dir);
+ if (!dma_skip_sync(dev))
+ __dma_sync_single_for_cpu(dev, addr + offset, size, dir);
}

static inline void dma_sync_single_range_for_device(struct device *dev,
dma_addr_t addr, unsigned long offset, size_t size,
enum dma_data_direction dir)
{
- return dma_sync_single_for_device(dev, addr + offset, size, dir);
+ if (!dma_skip_sync(dev))
+ __dma_sync_single_for_device(dev, addr + offset, size, dir);
}

/**
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 58db8fd70471..85feaa0e008c 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -329,7 +329,7 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
}
EXPORT_SYMBOL(dma_unmap_resource);

-void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
+void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -341,9 +341,9 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
ops->sync_single_for_cpu(dev, addr, size, dir);
debug_dma_sync_single_for_cpu(dev, addr, size, dir);
}
-EXPORT_SYMBOL(dma_sync_single_for_cpu);
+EXPORT_SYMBOL(__dma_sync_single_for_cpu);

-void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
+void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -355,9 +355,9 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
ops->sync_single_for_device(dev, addr, size, dir);
debug_dma_sync_single_for_device(dev, addr, size, dir);
}
-EXPORT_SYMBOL(dma_sync_single_for_device);
+EXPORT_SYMBOL(__dma_sync_single_for_device);

-void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -369,9 +369,9 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
ops->sync_sg_for_cpu(dev, sg, nelems, dir);
debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
}
-EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+EXPORT_SYMBOL(__dma_sync_sg_for_cpu);

-void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -383,7 +383,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
ops->sync_sg_for_device(dev, sg, nelems, dir);
debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
}
-EXPORT_SYMBOL(dma_sync_sg_for_device);
+EXPORT_SYMBOL(__dma_sync_sg_for_device);

/*
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
@@ -841,7 +841,7 @@ size_t dma_opt_mapping_size(struct device *dev)
}
EXPORT_SYMBOL_GPL(dma_opt_mapping_size);

-bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
+bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

@@ -849,7 +849,7 @@ bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
return dma_direct_need_sync(dev, dma_addr);
return ops->sync_single_for_cpu || ops->sync_single_for_device;
}
-EXPORT_SYMBOL_GPL(dma_need_sync);
+EXPORT_SYMBOL_GPL(__dma_need_sync);

unsigned long dma_get_merge_boundary(struct device *dev)
{
--
2.43.0

2024-02-14 17:28:05

by Robin Murphy

[permalink] [raw]

Subject: Re: [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used

On 2024-02-14 4:21 pm, Alexander Lobakin wrote:
> Some platforms do have DMA, but DMA there is always direct and coherent.
> Currently, even on such platforms DMA sync operations are compiled and
> called.
> Add a new hidden Kconfig symbol, DMA_NEED_SYNC, and set it only when
> either sync operations are needed or there is DMA ops or swiotlb
> enabled. Set dma_need_sync() and dma_skip_sync() depending on this
> symbol state and don't call sync ops when dma_skip_sync() is true.
> The change allows for future optimizations of DMA sync calls depending
> on compile-time or runtime conditions.
>
> Signed-off-by: Alexander Lobakin <[email protected]>
> ---
> kernel/dma/Kconfig | 4 ++
> include/linux/dma-mapping.h | 80 +++++++++++++++++++++++++++++++------
> kernel/dma/mapping.c | 20 +++++-----
> 3 files changed, 81 insertions(+), 23 deletions(-)
>
> diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> index d62f5957f36b..1c9ff05b1ecb 100644
> --- a/kernel/dma/Kconfig
> +++ b/kernel/dma/Kconfig
> @@ -107,6 +107,10 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC
> bool
> depends on SWIOTLB
>
> +config DMA_NEED_SYNC
> + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \
> + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB

I'm not sure DMA_OPS belongs here - several architectures have
non-trivial ops without syncs, e.g. Alpha.

> +
> config DMA_RESTRICTED_POOL
> bool "DMA Restricted Pool"
> depends on OF && OF_RESERVED_MEM && SWIOTLB
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 4a658de44ee9..6c7640441214 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -117,13 +117,13 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
> size_t size, enum dma_data_direction dir, unsigned long attrs);
> void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
> enum dma_data_direction dir, unsigned long attrs);
> -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
> +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
> enum dma_data_direction dir);
> -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
> +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
> size_t size, enum dma_data_direction dir);
> -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> int nelems, enum dma_data_direction dir);
> -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> int nelems, enum dma_data_direction dir);
> void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
> gfp_t flag, unsigned long attrs);
> @@ -147,7 +147,7 @@ u64 dma_get_required_mask(struct device *dev);
> bool dma_addressing_limited(struct device *dev);
> size_t dma_max_mapping_size(struct device *dev);
> size_t dma_opt_mapping_size(struct device *dev);
> -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
> +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr);
> unsigned long dma_get_merge_boundary(struct device *dev);
> struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
> enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
> @@ -195,19 +195,19 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
> size_t size, enum dma_data_direction dir, unsigned long attrs)
> {
> }
> -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> - size_t size, enum dma_data_direction dir)
> +static inline void __dma_sync_single_for_cpu(struct device *dev,
> + dma_addr_t addr, size_t size, enum dma_data_direction dir)

To me it would feel more logical to put all the wrappers inside the
#ifdef CONFIG_HAS_DMA and not touch these stubs at all (what does it
mean to skip an inline no-op?). Or in fact, if dma_skip_sync() is
constant false for !HAS_DMA, then we could also just make the external
function declarations unconditional and remove the stubs. Not a critical
matter though, and I defer to whatever Christoph thinks is most
maintainable.

> {
> }
> -static inline void dma_sync_single_for_device(struct device *dev,
> +static inline void __dma_sync_single_for_device(struct device *dev,
> dma_addr_t addr, size_t size, enum dma_data_direction dir)
> {
> }
> -static inline void dma_sync_sg_for_cpu(struct device *dev,
> +static inline void __dma_sync_sg_for_cpu(struct device *dev,
> struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> {
> }
> -static inline void dma_sync_sg_for_device(struct device *dev,
> +static inline void __dma_sync_sg_for_device(struct device *dev,
> struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> {
> }
> @@ -277,7 +277,7 @@ static inline size_t dma_opt_mapping_size(struct device *dev)
> {
> return 0;
> }
> -static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
> +static inline bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
> {
> return false;
> }
> @@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
> return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
> }
>
> +static inline void __dma_sync_single_range_for_cpu(struct device *dev,
> + dma_addr_t addr, unsigned long offset, size_t size,
> + enum dma_data_direction dir)
> +{
> + __dma_sync_single_for_cpu(dev, addr + offset, size, dir);
> +}
> +
> +static inline void __dma_sync_single_range_for_device(struct device *dev,
> + dma_addr_t addr, unsigned long offset, size_t size,
> + enum dma_data_direction dir)
> +{
> + __dma_sync_single_for_device(dev, addr + offset, size, dir);
> +}

There is no need to introduce these two.

> +
> +static inline bool dma_skip_sync(const struct device *dev)
> +{
> + return !IS_ENABLED(CONFIG_DMA_NEED_SYNC);
> +}
> +
> +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
> +{
> + return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
> +}

That's a bit of a mind-bender... is it actually just

return !dma_skip_sync(dev) && __dma_need_sync(dev, dma_addr);

?

(I do still think the negative flag makes it all a little harder to
follow in general than a positive "device needs to consider syncs" flag
would.)

> +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> + size_t size, enum dma_data_direction dir)
> +{
> + if (!dma_skip_sync(dev))
> + __dma_sync_single_for_cpu(dev, addr, size, dir);
> +}
> +
> +static inline void dma_sync_single_for_device(struct device *dev,
> + dma_addr_t addr, size_t size, enum dma_data_direction dir)
> +{
> + if (!dma_skip_sync(dev))
> + __dma_sync_single_for_device(dev, addr, size, dir);
> +}
> +
> +static inline void dma_sync_sg_for_cpu(struct device *dev,
> + struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> +{
> + if (!dma_skip_sync(dev))
> + __dma_sync_sg_for_cpu(dev, sg, nelems, dir);
> +}
> +
> +static inline void dma_sync_sg_for_device(struct device *dev,
> + struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> +{
> + if (!dma_skip_sync(dev))
> + __dma_sync_sg_for_device(dev, sg, nelems, dir);
> +}
> +
> static inline void dma_sync_single_range_for_cpu(struct device *dev,
> dma_addr_t addr, unsigned long offset, size_t size,
> enum dma_data_direction dir)
> {
> - return dma_sync_single_for_cpu(dev, addr + offset, size, dir);
> + if (!dma_skip_sync(dev))
> + __dma_sync_single_for_cpu(dev, addr + offset, size, dir);
> }
>
> static inline void dma_sync_single_range_for_device(struct device *dev,
> dma_addr_t addr, unsigned long offset, size_t size,
> enum dma_data_direction dir)
> {
> - return dma_sync_single_for_device(dev, addr + offset, size, dir);
> + if (!dma_skip_sync(dev))
> + __dma_sync_single_for_device(dev, addr + offset, size, dir);
> }

These two don't need changing either, since the dma_sync_single_*
wrappers have already taken care of it.

Thanks,
Robin.

2024-02-14 18:12:39

by Robin Murphy

[permalink] [raw]

Subject: Re: [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used

On 2024-02-14 4:21 pm, Alexander Lobakin wrote:
[...]
> +static inline bool dma_skip_sync(const struct device *dev)
> +{
> + return !IS_ENABLED(CONFIG_DMA_NEED_SYNC);
> +}

One more thing, could we please also make this conditional on
!CONFIG_DMA_API_DEBUG so that that doesn't lose coverage for validating
syncs?

Thanks,
Robin.

2024-02-15 05:07:27

by Christoph Hellwig

[permalink] [raw]

Subject: Re: [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used

On Wed, Feb 14, 2024 at 06:09:08PM +0000, Robin Murphy wrote:
> On 2024-02-14 4:21 pm, Alexander Lobakin wrote:
> [...]
>> +static inline bool dma_skip_sync(const struct device *dev)
>> +{
>> + return !IS_ENABLED(CONFIG_DMA_NEED_SYNC);
>> +}
>
> One more thing, could we please also make this conditional on
> !CONFIG_DMA_API_DEBUG so that that doesn't lose coverage for validating
> syncs?

Agreed.

2024-02-15 05:21:00

by Christoph Hellwig

[permalink] [raw]

Subject: Re: [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used

On Wed, Feb 14, 2024 at 05:20:50PM +0000, Robin Murphy wrote:
>> +config DMA_NEED_SYNC
>> + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \
>> + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB
>
> I'm not sure DMA_OPS belongs here - several architectures have non-trivial
> ops without syncs, e.g. Alpha.

True, but peeking through the ops is a bit hard. And I don't think it's
worth optimizing the dma sync performance on Alpha :)

>> +static inline void __dma_sync_single_for_cpu(struct device *dev,
>> + dma_addr_t addr, size_t size, enum dma_data_direction dir)
>
> To me it would feel more logical to put all the wrappers inside the #ifdef
> CONFIG_HAS_DMA and not touch these stubs at all (what does it mean to skip
> an inline no-op?). Or in fact, if dma_skip_sync() is constant false for
> !HAS_DMA, then we could also just make the external function declarations
> unconditional and remove the stubs. Not a critical matter though, and I
> defer to whatever Christoph thinks is most maintainable.

Your idea sounds reasonable to me, but I don't have a strong preference.

>> +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
>> +{
>> + return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
>> +}
>
> That's a bit of a mind-bender... is it actually just
>
> return !dma_skip_sync(dev) && __dma_need_sync(dev, dma_addr);
>
> ?

That looks a lot more readable for sure.

> (I do still think the negative flag makes it all a little harder to follow
> in general than a positive "device needs to consider syncs" flag would.)

Probably.

2024-02-19 12:54:10

by Alexander Lobakin

[permalink] [raw]

Subject: Re: [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used

From: Robin Murphy <[email protected]>
Date: Wed, 14 Feb 2024 17:20:50 +0000

> On 2024-02-14 4:21 pm, Alexander Lobakin wrote:

[...]

>> -static inline void dma_sync_single_for_cpu(struct device *dev,
>> dma_addr_t addr,
>> -        size_t size, enum dma_data_direction dir)
>> +static inline void __dma_sync_single_for_cpu(struct device *dev,
>> +        dma_addr_t addr, size_t size, enum dma_data_direction dir)
>
> To me it would feel more logical to put all the wrappers inside the
> #ifdef CONFIG_HAS_DMA and not touch these stubs at all (what does it
> mean to skip an inline no-op?). Or in fact, if dma_skip_sync() is
> constant false for !HAS_DMA, then we could also just make the external
> function declarations unconditional and remove the stubs. Not a critical
> matter though, and I defer to whatever Christoph thinks is most
> maintainable.

It's done like that due to that I'm adding a runtime check in the second
patch. I don't feel like touching this twice makes sense.

[...]

>> @@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct
>> device *dev, dma_addr_t addr,
>>       return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
>> }
>> +static inline void __dma_sync_single_range_for_cpu(struct device *dev,
>> +        dma_addr_t addr, unsigned long offset, size_t size,
>> +        enum dma_data_direction dir)
>> +{
>> +    __dma_sync_single_for_cpu(dev, addr + offset, size, dir);
>> +}
>> +
>> +static inline void __dma_sync_single_range_for_device(struct device
>> *dev,
>> +        dma_addr_t addr, unsigned long offset, size_t size,
>> +        enum dma_data_direction dir)
>> +{
>> +    __dma_sync_single_for_device(dev, addr + offset, size, dir);
>> +}
>
> There is no need to introduce these two.

I already replied to this in the previous thread. Some subsys may want
to check for the shortcut earlier to avoid call ladders of their own
functions. See patch 6 for example where I use this one.

>
>> +
>> +static inline bool dma_skip_sync(const struct device *dev)
>> +{
>> +    return !IS_ENABLED(CONFIG_DMA_NEED_SYNC);
>> +}
>> +
>> +static inline bool dma_need_sync(struct device *dev, dma_addr_t
>> dma_addr)
>> +{
>> +    return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
>> +}
>
> That's a bit of a mind-bender... is it actually just
>
>     return !dma_skip_sync(dev) && __dma_need_sync(dev, dma_addr);

Oh, indeed ._.

>
> ?
>
> (I do still think the negative flag makes it all a little harder to
> follow in general than a positive "device needs to consider syncs" flag
> would.)

I think it was in the original Eric's idea and I kept this.
I'm fine with inverting it.

[...]

> Thanks,
> Robin.

Thanks,
Olek

2024-02-26 16:27:55

by Robin Murphy

[permalink] [raw]

Subject: Re: [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used

On 19/02/2024 12:53 pm, Alexander Lobakin wrote:
> From: Robin Murphy <[email protected]>
> Date: Wed, 14 Feb 2024 17:20:50 +0000
>
>> On 2024-02-14 4:21 pm, Alexander Lobakin wrote:
>
> [...]
>
>>> -static inline void dma_sync_single_for_cpu(struct device *dev,
>>> dma_addr_t addr,
>>> -        size_t size, enum dma_data_direction dir)
>>> +static inline void __dma_sync_single_for_cpu(struct device *dev,
>>> +        dma_addr_t addr, size_t size, enum dma_data_direction dir)
>>
>> To me it would feel more logical to put all the wrappers inside the
>> #ifdef CONFIG_HAS_DMA and not touch these stubs at all (what does it
>> mean to skip an inline no-op?). Or in fact, if dma_skip_sync() is
>> constant false for !HAS_DMA, then we could also just make the external
>> function declarations unconditional and remove the stubs. Not a critical
>> matter though, and I defer to whatever Christoph thinks is most
>> maintainable.
>
> It's done like that due to that I'm adding a runtime check in the second
> patch. I don't feel like touching this twice makes sense.

Huh? Why would anything need touching twice? All I'm saying is that it's
pretty pointless to add any invocations of dma_skip_sync() in !HAS_DMA
paths where we already know the whole API is stubbed out anyway. The
only cases which are worth differentiating here are HAS_DMA +
DMA_NEED_SYNC vs. HAS_DMA + !DMA_NEED_SYNC (with the subsequent runtime
check then just subdividing the former).

>
> [...]
>
>>> @@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct
>>> device *dev, dma_addr_t addr,
>>>       return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
>>> }
>>> +static inline void __dma_sync_single_range_for_cpu(struct device *dev,
>>> +        dma_addr_t addr, unsigned long offset, size_t size,
>>> +        enum dma_data_direction dir)
>>> +{
>>> +    __dma_sync_single_for_cpu(dev, addr + offset, size, dir);
>>> +}
>>> +
>>> +static inline void __dma_sync_single_range_for_device(struct device
>>> *dev,
>>> +        dma_addr_t addr, unsigned long offset, size_t size,
>>> +        enum dma_data_direction dir)
>>> +{
>>> +    __dma_sync_single_for_device(dev, addr + offset, size, dir);
>>> +}
>>
>> There is no need to introduce these two.
>
> I already replied to this in the previous thread. Some subsys may want
> to check for the shortcut earlier to avoid call ladders of their own
> functions. See patch 6 for example where I use this one.

Ugh, no. If the page pool code wants to be clever poking around and
sidestepping parts of the documented API, it can flippin' well open-code
a single addition to call __dma_sync_single_for_device() directly
itself. I'm not at all keen on having to maintain "common" APIs for such
niche trickery.

Thanks,
Robin.