According to Documentation/DMA-API.txt:
Warnings: Memory coherency operates at a granularity called the cache
line width. In order for memory mapped by this API to operate
correctly, the mapped region must begin exactly on a cache line
boundary and end exactly on one (to prevent two separately mapped
regions from sharing a single cache line). Since the cache line size
may not be known at compile time, the API will not enforce this
requirement. Therefore, it is recommended that driver writers who
don't take special care to determine the cache line size at run time
only map virtual regions that begin and end on page boundaries (which
are guaranteed also to be cache line boundaries).
On some systems where DMA is non-coherent and requires software
writeback / invalidate of the caches, we must ensure that
dma_(un)map_single is called with a cacheline aligned buffer and a
length of a whole number of cachelines.
To address the alignment requirements of DMA buffers, keep a separate
entry in stmmac_rx_queue for the aligned skbuff head. Use this for
dma_map_single, such that the address meets the cacheline alignment
requirents. Use skb_headroom() to convert between rx_skbuff_head, the
aligned head of the buffer, and the packet data, rx_skbuff_dma.
Tested on a Creator Ci40 with Pistachio SoC.
Signed-off-by: Matt Redfearn <[email protected]>
---
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 50 ++++++++++++++++-------
2 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index a916e13624eb..dd26a724dee7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -67,6 +67,7 @@ struct stmmac_rx_queue {
struct dma_desc *dma_rx ____cacheline_aligned_in_smp;
struct sk_buff **rx_skbuff;
dma_addr_t *rx_skbuff_dma;
+ dma_addr_t *rx_skbuff_head;
unsigned int cur_rx;
unsigned int dirty_rx;
u32 rx_zeroc_thresh;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1763e48c84e2..da68eeff2a1c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1132,14 +1132,16 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
return -ENOMEM;
}
rx_q->rx_skbuff[i] = skb;
- rx_q->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
- priv->dma_buf_sz,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(priv->device, rx_q->rx_skbuff_dma[i])) {
+ rx_q->rx_skbuff_head[i] = dma_map_single(priv->device, skb->head,
+ skb_headroom(skb) +
+ priv->dma_buf_sz,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(priv->device, rx_q->rx_skbuff_head[i])) {
netdev_err(priv->dev, "%s: DMA mapping error\n", __func__);
dev_kfree_skb_any(skb);
return -EINVAL;
}
+ rx_q->rx_skbuff_dma[i] = rx_q->rx_skbuff_head[i] + skb_headroom(skb);
if (priv->synopsys_id >= DWMAC_CORE_4_00)
p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[i]);
@@ -1164,7 +1166,8 @@ static void stmmac_free_rx_buffer(struct stmmac_priv *priv, u32 queue, int i)
struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
if (rx_q->rx_skbuff[i]) {
- dma_unmap_single(priv->device, rx_q->rx_skbuff_dma[i],
+ dma_unmap_single(priv->device, rx_q->rx_skbuff_head[i],
+ skb_headroom(rx_q->rx_skbuff[i]) +
priv->dma_buf_sz, DMA_FROM_DEVICE);
dev_kfree_skb_any(rx_q->rx_skbuff[i]);
}
@@ -1438,6 +1441,7 @@ static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
rx_q->dma_erx, rx_q->dma_rx_phy);
kfree(rx_q->rx_skbuff_dma);
+ kfree(rx_q->rx_skbuff_head);
kfree(rx_q->rx_skbuff);
}
}
@@ -1500,6 +1504,12 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
if (!rx_q->rx_skbuff_dma)
goto err_dma;
+ rx_q->rx_skbuff_head = kmalloc_array(DMA_RX_SIZE,
+ sizeof(dma_addr_t),
+ GFP_KERNEL);
+ if (!rx_q->rx_skbuff_head)
+ goto err_dma;
+
rx_q->rx_skbuff = kmalloc_array(DMA_RX_SIZE,
sizeof(struct sk_buff *),
GFP_KERNEL);
@@ -3225,15 +3235,18 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
}
rx_q->rx_skbuff[entry] = skb;
- rx_q->rx_skbuff_dma[entry] =
- dma_map_single(priv->device, skb->data, bfsize,
+ rx_q->rx_skbuff_head[entry] =
+ dma_map_single(priv->device, skb->head,
+ skb_headroom(skb) + bfsize,
DMA_FROM_DEVICE);
if (dma_mapping_error(priv->device,
- rx_q->rx_skbuff_dma[entry])) {
+ rx_q->rx_skbuff_head[entry])) {
netdev_err(priv->dev, "Rx DMA map failed\n");
dev_kfree_skb(skb);
break;
}
+ rx_q->rx_skbuff_dma[entry] = rx_q->rx_skbuff_head[entry]
+ + skb_headroom(skb);
if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]);
@@ -3333,10 +3346,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
* them in stmmac_rx_refill() function so that
* device can reuse it.
*/
+ int head = skb_headroom(rx_q->rx_skbuff[entry]);
+
rx_q->rx_skbuff[entry] = NULL;
dma_unmap_single(priv->device,
- rx_q->rx_skbuff_dma[entry],
- priv->dma_buf_sz,
+ rx_q->rx_skbuff_head[entry],
+ head + priv->dma_buf_sz,
DMA_FROM_DEVICE);
}
} else {
@@ -3384,6 +3399,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
if (unlikely(!priv->plat->has_gmac4 &&
((frame_len < priv->rx_copybreak) ||
stmmac_rx_threshold_count(rx_q)))) {
+ int total_len;
+
skb = netdev_alloc_skb_ip_align(priv->dev,
frame_len);
if (unlikely(!skb)) {
@@ -3394,9 +3411,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
break;
}
+ total_len = skb_headroom(skb) + frame_len;
+
dma_sync_single_for_cpu(priv->device,
- rx_q->rx_skbuff_dma
- [entry], frame_len,
+ rx_q->rx_skbuff_head
+ [entry], total_len,
DMA_FROM_DEVICE);
skb_copy_to_linear_data(skb,
rx_q->
@@ -3405,8 +3424,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
skb_put(skb, frame_len);
dma_sync_single_for_device(priv->device,
- rx_q->rx_skbuff_dma
- [entry], frame_len,
+ rx_q->rx_skbuff_head
+ [entry], total_len,
DMA_FROM_DEVICE);
} else {
skb = rx_q->rx_skbuff[entry];
@@ -3423,7 +3442,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
skb_put(skb, frame_len);
dma_unmap_single(priv->device,
- rx_q->rx_skbuff_dma[entry],
+ rx_q->rx_skbuff_head[entry],
+ skb_headroom(skb) +
priv->dma_buf_sz,
DMA_FROM_DEVICE);
}
--
2.7.4
From: Matt Redfearn <[email protected]>
Date: Fri, 22 Sep 2017 12:13:53 +0100
> According to Documentation/DMA-API.txt:
> Warnings: Memory coherency operates at a granularity called the cache
> line width. In order for memory mapped by this API to operate
> correctly, the mapped region must begin exactly on a cache line
> boundary and end exactly on one (to prevent two separately mapped
> regions from sharing a single cache line). Since the cache line size
> may not be known at compile time, the API will not enforce this
> requirement. Therefore, it is recommended that driver writers who
> don't take special care to determine the cache line size at run time
> only map virtual regions that begin and end on page boundaries (which
> are guaranteed also to be cache line boundaries).
This is rediculious. You're misreading what this document is trying
to explain.
As long as you use the dma_{map,unamp}_single() and sync to/from
deivce interfaces properly, the cacheline issues will be handled properly
and the cpu and the device will see proper uptodate memory contents.
It is completely rediculious to require every driver to stash away two
sets of pointer for every packet, and to DMA map the headroom of the SKB
which is wasteful.
I'm not applying this, fix this problem properly, thanks.
Hi David,
Thanks for your feedback.
On 23/09/17 02:26, David Miller wrote:
> From: Matt Redfearn <[email protected]>
> Date: Fri, 22 Sep 2017 12:13:53 +0100
>
>> According to Documentation/DMA-API.txt:
>> Warnings: Memory coherency operates at a granularity called the cache
>> line width. In order for memory mapped by this API to operate
>> correctly, the mapped region must begin exactly on a cache line
>> boundary and end exactly on one (to prevent two separately mapped
>> regions from sharing a single cache line). Since the cache line size
>> may not be known at compile time, the API will not enforce this
>> requirement. Therefore, it is recommended that driver writers who
>> don't take special care to determine the cache line size at run time
>> only map virtual regions that begin and end on page boundaries (which
>> are guaranteed also to be cache line boundaries).
> This is rediculious. You're misreading what this document is trying
> to explain.
To be clear, is your interpretation of the documentation that drivers do
not have to ensure cacheline alignment?
As I read that documentation of the DMA API, it puts the onus on device
drivers to ensure that they operate on cacheline aligned & sized blocks
of memory. It states "the mapped region must begin exactly on a cache
line boundary". We know that skb->data is not cacheline aligned, since
it is skb_headroom() bytes into the skb buffer, and the value of
skb_headroom is not a multiple of cachelines. To give an example, on the
Creator Ci40 platform (a 32bit MIPS platform), I have the following values:
skb_headroom(skb) = 130 bytes
sbb->head = 0x8ed9b800 (32byte cacheline aligned)
skb->data = 0x8ed9b882 (not cacheline aligned)
Since the MIPS architecture requires software cache management,
performing a dma_map_single(TO_DEVICE) will writeback and invalidate the
cachelines for the required region. To comply with (our interpretation
of) the DMA API documentation, the MIPS implementation expects a
cacheline aligned & sized buffer, so that it can writeback a set of
complete cachelines. Indeed a recent patch
(https://patchwork.linux-mips.org/patch/14624/) causes the MIPS dma
mapping code to emit warnings if the buffer it is requested to sync is
not cachline aligned. This driver, as is, fails this test and causes
thousands of warnings to be emitted.
The situation for dma_map_single(FROM_DEVICE) is potentially worse,
since it will perform a cache invalidate of all lines for the buffer. If
the buffer is not cacheline aligned, this will throw away any adjacent
data in the same cacheline. The dma_map implementation has no way to
know if data adjacent to the buffer it is requested to sync is valuable
or not, and it will simply be thrown away by the cache invalidate.
If I am somehow misinterpreting the DMA API documentation, and drivers
are NOT required to ensure that buffers to be synced are cacheline
aligned, then there is only one way that the MIPS implementation can
ensure that it writeback / invalidates cachelines containing only the
requested data buffer, and that would be to use bounce buffers. Clearly
that will be devastating for performance as every outgoing packet will
need to be copied from it's unaligned location to a guaranteed aligned
location, and written back from there. Similarly incoming packets would
have to somehow be initially located in a cacheline aligned location
before being copied into the non-cacheline aligned location supplied by
the driver.
> As long as you use the dma_{map,unamp}_single() and sync to/from
> deivce interfaces properly, the cacheline issues will be handled properly
> and the cpu and the device will see proper uptodate memory contents.
I interpret the DMA API document (and the MIPS arch dma code operates
this way) as stating that the driver must ensure that buffers passed to
it are cacheline aligned, and cacheline sized, to prevent cache
management throwing away adjacent data in the same cacheline.
That is what this patch is for, to change the buffer address passed to
the DMA API to skb->head, which is (as far as I know) guaranteed to be
cacheline aligned.
>
> It is completely rediculious to require every driver to stash away two
> sets of pointer for every packet, and to DMA map the headroom of the SKB
> which is wasteful.
>
> I'm not applying this, fix this problem properly, thanks.
An alternative, slightly less invasive change, may be to subtract
NET_IP_ALIGN from the dma buffer address, so that the buffer for which a
sync is requested is cacheline aligned (is that guaranteed?). Would that
change be more acceptable?
Thanks,
Matt
From: Matt Redfearn
> Sent: 26 September 2017 14:58
...
> > As long as you use the dma_{map,unamp}_single() and sync to/from
> > deivce interfaces properly, the cacheline issues will be handled properly
> > and the cpu and the device will see proper uptodate memory contents.
>
> I interpret the DMA API document (and the MIPS arch dma code operates
> this way) as stating that the driver must ensure that buffers passed to
> it are cacheline aligned, and cacheline sized, to prevent cache
> management throwing away adjacent data in the same cacheline.
The important thing is that the driver must not dirty any cache lines
that are mapped for DMA (from the device).
Typically this is not a problem because the driver doesn't look
at skb (etc) that contain receive buffers once the dma is setup.
David
From: Matt Redfearn <[email protected]>
Date: Tue, 26 Sep 2017 14:57:39 +0100
> Since the MIPS architecture requires software cache management,
> performing a dma_map_single(TO_DEVICE) will writeback and invalidate
> the cachelines for the required region. To comply with (our
> interpretation of) the DMA API documentation, the MIPS implementation
> expects a cacheline aligned & sized buffer, so that it can writeback a
> set of complete cachelines. Indeed a recent patch
> (https://patchwork.linux-mips.org/patch/14624/) causes the MIPS dma
> mapping code to emit warnings if the buffer it is requested to sync is
> not cachline aligned. This driver, as is, fails this test and causes
> thousands of warnings to be emitted.
For a device write, if the device does what it is told and does not
access bytes outside of the periphery of the DMA mapping, nothing bad
can happen.
When the DMA buffer is mapped, the cpu side cachelines are flushed (even
ones which are partially part of the requsted mapping, this is _fine_).
The device writes into only the bytes it was given access to, which
starts at the DMA address.
The unmap and/or sync operation after the DMA transfer needs to do
nothing on the cpu side since the map operation flushed the cpu side
caches already.
When the cpu reads, it will pull the cacheline from main memory and
see what the device wrote.
It's not like the device can put "garbage" into the bytes earlier in
the cacheline it was given partial access to.
A device read is similar, the cpu cachelines are written out to main
memory at map time and the device sees what it needs to see.
I want to be shown a real example where corruption or bad data can
occur when the DMA API is used correctly.
Other platforms write "complete cachelines" in order to implement
the cpu and/or host controller parts of the DMA API implementation.
I see nothing special about MIPS at all.
If you're given a partial cache line, you align the addresses such
that you flush every cache line contained within the provided address
range.
I really don't see what the problem is and why MIPS needs special
handling. You will have to give specific examples, step by step,
where things can go wrong before I will be able to consider your
changes.
Thanks.
Hi David,
On Tuesday, 26 September 2017 10:34:00 PDT David Miller wrote:
> From: Matt Redfearn <[email protected]>
> Date: Tue, 26 Sep 2017 14:57:39 +0100
>
> > Since the MIPS architecture requires software cache management,
> > performing a dma_map_single(TO_DEVICE) will writeback and invalidate
> > the cachelines for the required region. To comply with (our
> > interpretation of) the DMA API documentation, the MIPS implementation
> > expects a cacheline aligned & sized buffer, so that it can writeback a
> > set of complete cachelines. Indeed a recent patch
> > (https://patchwork.linux-mips.org/patch/14624/) causes the MIPS dma
> > mapping code to emit warnings if the buffer it is requested to sync is
> > not cachline aligned. This driver, as is, fails this test and causes
> > thousands of warnings to be emitted.
>
> For a device write, if the device does what it is told and does not
> access bytes outside of the periphery of the DMA mapping, nothing bad
> can happen.
>
> When the DMA buffer is mapped, the cpu side cachelines are flushed (even
> ones which are partially part of the requsted mapping, this is _fine_).
This is not fine. Let's presume we writeback+invalidate the cache lines that
are only partially covered by the DMA mapping at its beginning or end, and
just invalidate all the lines that are wholly covered by the mapping. So far
so good.
> The device writes into only the bytes it was given access to, which
> starts at the DMA address.
OK - still fine but *only* if we don't write to anything that happens to be
part of the cache lines that are only partially covered by the DMA mapping &
make them dirty. If we do then any writeback of those lines will clobber data
the device wrote, and any invalidation of them will discard data the CPU
wrote.
How would you have us ensure that such writes don't occur?
This really does not feel to me like something to leave up to drivers without
any means of checking or enforcing correctness. The requirement to align DMA
mappings at cache-line boundaries, as outlined in Documentation/DMA-API.txt,
allows this to be enforced. If you want to ignore this requirement then there
is no clear way to verify that we aren't writing to cache lines involved in a
DMA transfer whilst the transfer is occurring, and no sane way to handle those
cache lines if we do.
> The unmap and/or sync operation after the DMA transfer needs to do
> nothing on the cpu side since the map operation flushed the cpu side
> caches already.
>
> When the cpu reads, it will pull the cacheline from main memory and
> see what the device wrote.
This is not true, because:
1) CPUs may speculatively read data. In between the invalidations that we did
earlier & the point at which the transfer completes, the CPU may have
speculatively read any arbitrary part of the memory mapped for DMA.
2) If we wrote to any lines that are only partially covered by the DMA mapping
then of course they're valid & dirty, and an access won't fetch from main
memory.
We therefore need to perform cache invalidation again at the end of the
transfer - on MIPS you can grep for cpu_needs_post_dma_flush to find this.
>From a glance ARM has similar post-DMA invalidation in
__dma_page_dev_to_cpu(), ARM64 in __dma_unmap_area() etc etc.
At this point what would you have us do with cache lines that are only
partially covered by the DMA mapping? As above - if we writeback+invalidate we
risk clobbering data written by the device. If we just invalidate we risk
discarding data written by the CPU. And this is even ignoring the risk that a
writeback of one of these lines happens due to eviction during the DMA
transfer, which we have no control over at all.
> It's not like the device can put "garbage" into the bytes earlier in
> the cacheline it was given partial access to.
Right - but the CPU can "put garbage" into the bytes the device was given
access to, simply by fetching stale bytes from main memory before the device
overwrites them.
> I really don't see what the problem is and why MIPS needs special
> handling. You will have to give specific examples, step by step,
> where things can go wrong before I will be able to consider your
> changes.
MIPS doesn't need special handling - it just needs the driver to obey a
documented restriction when using the DMA API. One could argue that it ought
to be you who justifies why you feel networking drivers can ignore the
documentation, and that if you disagree with the documented API you should aim
to change it rather than ignore it.
Thanks,
Paul
From: Paul Burton <[email protected]>
Date: Tue, 26 Sep 2017 11:48:19 -0700
>> The device writes into only the bytes it was given access to, which
>> starts at the DMA address.
>
> OK - still fine but *only* if we don't write to anything that happens to be
> part of the cache lines that are only partially covered by the DMA mapping &
> make them dirty. If we do then any writeback of those lines will clobber data
> the device wrote, and any invalidation of them will discard data the CPU
> wrote.
>
> How would you have us ensure that such writes don't occur?
>
> This really does not feel to me like something to leave up to drivers without
> any means of checking or enforcing correctness. The requirement to align DMA
> mappings at cache-line boundaries, as outlined in Documentation/DMA-API.txt,
> allows this to be enforced. If you want to ignore this requirement then there
> is no clear way to verify that we aren't writing to cache lines involved in a
> DMA transfer whilst the transfer is occurring, and no sane way to handle those
> cache lines if we do.
The memory immediately before skb->data and immediately after
skb->data+skb->len will not be accessed. This is guaranteed.
I will request something exactly one more time to give you the benfit
of the doubt that you want to show me why this is _really_ a problem
and not a problem only in theory.
Please show me an exact sequence, with current code, in a current driver
that uses the DMA API properly, where corruption really can occur.
The new restriction is absolutely not reasonable for this use case.
It it furthermore impractical to require the 200+ drivers the use this
technique to allocate and map networking buffers to abide by this new
rule. Many platforms with even worse cache problems that MIPS handle
this just fine.
Thank you.
Hi David,
On Tuesday, 26 September 2017 13:33:09 PDT David Miller wrote:
> From: Paul Burton <[email protected]>
> Date: Tue, 26 Sep 2017 11:48:19 -0700
>
> >> The device writes into only the bytes it was given access to, which
> >> starts at the DMA address.
> >
> > OK - still fine but *only* if we don't write to anything that happens to
> > be
> > part of the cache lines that are only partially covered by the DMA mapping
> > & make them dirty. If we do then any writeback of those lines will
> > clobber data the device wrote, and any invalidation of them will discard
> > data the CPU wrote.
> >
> > How would you have us ensure that such writes don't occur?
> >
> > This really does not feel to me like something to leave up to drivers
> > without any means of checking or enforcing correctness. The requirement
> > to align DMA mappings at cache-line boundaries, as outlined in
> > Documentation/DMA-API.txt, allows this to be enforced. If you want to
> > ignore this requirement then there is no clear way to verify that we
> > aren't writing to cache lines involved in a DMA transfer whilst the
> > transfer is occurring, and no sane way to handle those cache lines if we
> > do.
>
> The memory immediately before skb->data and immediately after
> skb->data+skb->len will not be accessed. This is guaranteed.
This guarantee is not made known to the DMA API or the per-arch code backing
it, nor can I see it documented anywhere. It's good to hear that it exists in
some form though.
> I will request something exactly one more time to give you the benfit
> of the doubt that you want to show me why this is _really_ a problem
> and not a problem only in theory.
My previous message simply walked through your existing example & explained
why your assumptions can be incorrect as far as the DMA API & interactions
with caches go - I don't think that warrants the seemingly confrontational
tone.
> Please show me an exact sequence, with current code, in a current driver
> that uses the DMA API properly, where corruption really can occur.
How about this:
https://patchwork.kernel.org/patch/9423097/
Now this isn't networking code at fault, it was a problem with MMC. Given that
you say there's a guarantee that networking code won't write to cache lines
that partially include memory mapped for DMA, perhaps networking code is
immune to this issue (though at a minimum I think that guarantee ought to be
documented so that others know to keep it true).
The question remains though: what would you have us do to catch the broken
uses of the DMA API with non-cacheline-aligned memory? By not obeying
Documentation/DMA-API.txt you're preventing us from adding checks that catch
others who also disobey the alignment requirement in ways that are not fine,
and which result in otherwise difficult to track down memory corruption.
> The new restriction is absolutely not reasonable for this use case.
> It it furthermore impractical to require the 200+ drivers the use this
> technique to allocate and map networking buffers to abide by this new
> rule. Many platforms with even worse cache problems that MIPS handle
> this just fine.
>
> Thank you.
One disconnect here is that you describe a "new restriction" whilst the
restriction we're talking about has been documented in Documentation/DMA-
API.txt since at least the beginning of the git era - it is not new at all.
The only thing that changed for us that I have intended to warn when the
restriction is not met, because that often indicates genuine & otherwise
difficult to detect bugs such as the MMC one I linked to above. FYI that
warning has not gone into mainline yet anyway.
I'd suggest that at a minimum if you're unwilling to obey the API as described
in Documentation/DMA-API.txt then it would be beneficial if you could propose
a change to it such that it works for you, and perhaps we can extend the API &
its documentation to allow your usage whilst also allowing us to catch broken
uses.
Surely we can agree that the current situation wherein networking code clearly
disobeys the documented API is not a good one, and that allowing other broken
uses to slip by unnoticed except in rare difficult to track down corner cases
is not good either.
Thanks,
Paul
From: Paul Burton <[email protected]>
Date: Tue, 26 Sep 2017 14:30:33 -0700
> I'd suggest that at a minimum if you're unwilling to obey the API as
> described in Documentation/DMA-API.txt then it would be beneficial
> if you could propose a change to it such that it works for you, and
> perhaps we can extend the API & its documentation to allow your
> usage whilst also allowing us to catch broken uses.
The networking driver code works fine as is.
I also didn't write that ill-advised documentation in the DMA docs,
nor the non-merged new MIPS assertion.
So I'm trying to figure out on what basis I am required to do
anything.
Thank you.
Hi David,
On Tuesday, 26 September 2017 19:52:44 PDT David Miller wrote:
> From: Paul Burton <[email protected]>
> Date: Tue, 26 Sep 2017 14:30:33 -0700
>
> > I'd suggest that at a minimum if you're unwilling to obey the API as
> > described in Documentation/DMA-API.txt then it would be beneficial
> > if you could propose a change to it such that it works for you, and
> > perhaps we can extend the API & its documentation to allow your
> > usage whilst also allowing us to catch broken uses.
>
> The networking driver code works fine as is.
>
> I also didn't write that ill-advised documentation in the DMA docs,
> nor the non-merged new MIPS assertion.
>
> So I'm trying to figure out on what basis I am required to do
> anything.
>
> Thank you.
Nobody said you wrote the documentation, but you do maintain code which
disobeys the documented DMA API & now you're being an ass about it
unnecessarily.
Nobody said that you are required to do anything, I suggested that it would be
beneficial if you were to suggest a change to the documented DMA API such that
it allows your usage where it currently does not. If you don't want to have
any input into that, and you actually think that your current approach of
ignoring the documented API is the best path forwards, then we're probably
done here & I'll be making a note to avoid yourself & anything under net/ to
whatever extent is possible...
Thanks,
Paul
From: Paul Burton <[email protected]>
Date: Tue, 26 Sep 2017 21:30:56 -0700
> Nobody said that you are required to do anything, I suggested that
> it would be beneficial if you were to suggest a change to the
> documented DMA API such that it allows your usage where it currently
> does not.
Documentation is often wrong and it is here. What 200+ drivers
actually do and depend upon trumps a simple text document.
The requirement is that the memory remains quiescent on the cpu side
while the device messes with it. And that this quiescence requirement
may or may not be on a cache line basis.
There is absolutely no requirement that the buffers themselves are
cache line aligned.
In fact, receive buffers for networking are intentionally 2-byte
aligned in order for the ipv4 headers to be naturally 32-bit aligned.
Cache line aligning receive buffers will actually make some
architectures trap because of the bad alignment.
So see, this cache line alignment requirement is pure madness from
just about any perspective whatsoever.
Hi David,
On Tuesday, 26 September 2017 21:53:21 PDT David Miller wrote:
> From: Paul Burton <[email protected]>
> Date: Tue, 26 Sep 2017 21:30:56 -0700
>
> > Nobody said that you are required to do anything, I suggested that
> > it would be beneficial if you were to suggest a change to the
> > documented DMA API such that it allows your usage where it currently
> > does not.
>
> Documentation is often wrong and it is here. What 200+ drivers
> actually do and depend upon trumps a simple text document.
Agreed - but if the documented API is wrong then it should change.
> The requirement is that the memory remains quiescent on the cpu side
> while the device messes with it. And that this quiescence requirement
> may or may not be on a cache line basis.
>
> There is absolutely no requirement that the buffers themselves are
> cache line aligned.
>
> In fact, receive buffers for networking are intentionally 2-byte
> aligned in order for the ipv4 headers to be naturally 32-bit aligned.
>
> Cache line aligning receive buffers will actually make some
> architectures trap because of the bad alignment.
>
> So see, this cache line alignment requirement is pure madness from
> just about any perspective whatsoever.
Thank you - that's more constructive.
I understand that the network code doesn't suffer from a problem with using
non-cacheline-aligned buffers, because you guarantee that the CPU will not be
writing to anything on either side of the memory mapped for DMA up to at least
a cache line boundary. That is all well & good (though still, I think that
ought to be documented somewhere even if just by a comment somewhere in linux/
sk_buff.h).
There is still a problem though in other cases which do not provide such a
guarantee - for example the MMC issue I pointed out previously - which it
would be useful to be able to catch rather than allowing silent memory
corruption which can be difficult to track down. Whilst the particular case of
mapping a struct sk_buff's data for DMA might not trigger this issue the issue
does still exist in other cases for which aligning data to a cache line
boundary is not always "pure madness".
So whilst it sounds like you'd happily just change or remove the paragraph
about cache-line alignment in Documentation/DMA-API.txt, and I agree that
would be a good start, I wonder whether we could do something better. One
option might be for the warning in the MIPS DMA code to be predicated on one
of the cache lines only partially covered by a DMA mapping actually being
dirty - though this would probably be a more expensive check than we'd want in
the general case so might have to be conditional upon some new debug entry in
Kconfig. I'll give this some thought.
Thanks,
Paul