Add the capability to recycle rx buffers if they are not consumed by networking
stack.
This series is based on 'mt76: add size check for additional rx fragments' series:
https://patchwork.kernel.org/patch/10709453/
Lorenzo Bianconi (2):
mt76: dma: do not build skb if reported len does not fit in buf_size
mt76: dma: add rx buffer recycle support
drivers/net/wireless/mediatek/mt76/dma.c | 75 +++++++++++++++++++----
drivers/net/wireless/mediatek/mt76/mt76.h | 3 +
2 files changed, 67 insertions(+), 11 deletions(-)
--
2.19.2
Precompute data length in order to avoid to allocate the related
skb data structure if reported length does not fit in queue buf_size
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
drivers/net/wireless/mediatek/mt76/dma.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index b7fd2e110663..1db163c40dcf 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -417,10 +417,9 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
static int
mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
{
+ int len, data_len, done = 0;
struct sk_buff *skb;
unsigned char *data;
- int len;
- int done = 0;
bool more;
while (done < budget) {
@@ -430,7 +429,12 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
if (!data)
break;
- if (q->buf_size < len + q->buf_offset) {
+ if (q->rx_head)
+ data_len = q->buf_size;
+ else
+ data_len = SKB_WITH_OVERHEAD(q->buf_size);
+
+ if (data_len < len + q->buf_offset) {
dev_kfree_skb(q->rx_head);
q->rx_head = NULL;
@@ -448,12 +452,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
skb_free_frag(data);
continue;
}
-
skb_reserve(skb, q->buf_offset);
- if (skb->tail + len > skb->end) {
- dev_kfree_skb(skb);
- continue;
- }
if (q == &dev->q_rx[MT_RXQ_MCU]) {
u32 *rxfce = (u32 *) skb->cb;
--
2.19.2
Add support for recycling rx buffers if they are not forwarded
to network stack instead of reallocate them from scratch
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
drivers/net/wireless/mediatek/mt76/dma.c | 60 +++++++++++++++++++++--
drivers/net/wireless/mediatek/mt76/mt76.h | 3 ++
2 files changed, 60 insertions(+), 3 deletions(-)
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 1db163c40dcf..eceadfa3f980 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -39,6 +39,15 @@ mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q)
if (!q->entry)
return -ENOMEM;
+ /* allocate recycle buffer ring */
+ if (q == &dev->q_rx[MT_RXQ_MCU] ||
+ q == &dev->q_rx[MT_RXQ_MAIN]) {
+ size = q->ndesc * sizeof(*q->recycle);
+ q->recycle = devm_kzalloc(dev->dev, size, GFP_KERNEL);
+ if (!q->recycle)
+ return -ENOMEM;
+ }
+
/* clear descriptors */
for (i = 0; i < q->ndesc; i++)
q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
@@ -317,6 +326,49 @@ int mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
}
EXPORT_SYMBOL_GPL(mt76_dma_tx_queue_skb);
+/* caller must hold mt76_queue spinlock */
+static u8 *mt76_dma_get_free_buf(struct mt76_queue *q, bool flush)
+{
+ if (q->recycle[q->rhead] || flush) {
+ u8 *buff = q->recycle[q->rhead];
+
+ q->recycle[q->rhead] = NULL;
+ q->rhead = (q->rhead + 1) % q->ndesc;
+ return buff;
+ }
+
+ return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
+}
+
+static void
+mt76_dma_set_recycle_buf(struct mt76_queue *q, u8 *data)
+{
+ spin_lock_bh(&q->lock);
+ if (!q->recycle[q->rtail]) {
+ q->recycle[q->rtail] = data;
+ q->rtail = (q->rtail + 1) % q->ndesc;
+ } else {
+ skb_free_frag(data);
+ }
+ spin_unlock_bh(&q->lock);
+}
+
+static void
+mt76_dma_free_recycle_ring(struct mt76_queue *q)
+{
+ u8 *buf;
+
+ spin_lock_bh(&q->lock);
+ while (true) {
+ buf = mt76_dma_get_free_buf(q, true);
+ if (!buf)
+ break;
+
+ skb_free_frag(buf);
+ }
+ spin_unlock_bh(&q->lock);
+}
+
static int
mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
{
@@ -332,7 +384,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
while (q->queued < q->ndesc - 1) {
struct mt76_queue_buf qbuf;
- buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
+ buf = mt76_dma_get_free_buf(q, false);
if (!buf)
break;
@@ -373,6 +425,8 @@ mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
} while (1);
spin_unlock_bh(&q->lock);
+ mt76_dma_free_recycle_ring(q);
+
if (!q->rx_page.va)
return;
@@ -438,7 +492,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
dev_kfree_skb(q->rx_head);
q->rx_head = NULL;
- skb_free_frag(data);
+ mt76_dma_set_recycle_buf(q, data);
continue;
}
@@ -449,7 +503,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
skb = build_skb(data, q->buf_size);
if (!skb) {
- skb_free_frag(data);
+ mt76_dma_set_recycle_buf(q, data);
continue;
}
skb_reserve(skb, q->buf_offset);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 5cd508a68609..95546c744494 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -114,6 +114,9 @@ struct mt76_queue {
spinlock_t lock;
struct mt76_queue_entry *entry;
struct mt76_desc *desc;
+ /* recycle ring */
+ u16 rhead, rtail;
+ u8 **recycle;
struct list_head swq;
int swq_queued;
--
2.19.2
>
> Add support for recycling rx buffers if they are not forwarded
> to network stack instead of reallocate them from scratch
>
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
Felix,
could you please drop this patch since it does not help to reduce pressure
on page_frag_cache.
Regards,
Lorenzo
On Wed, Dec 05, 2018 at 11:37:33AM +0100, Lorenzo Bianconi wrote:
> >
> > Add support for recycling rx buffers if they are not forwarded
> > to network stack instead of reallocate them from scratch
> >
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
>
> Felix,
>
> could you please drop this patch since it does not help to reduce pressure
> on page_frag_cache.
What is the problem ? Maybe using kmalloc() instead of page_frag_alloc()
could help (kmalloc has standard kmem_cache for 2048 bytes object) ?
Thanks
Stanislaw
> On Wed, Dec 05, 2018 at 11:37:33AM +0100, Lorenzo Bianconi wrote:
> > >
> > > Add support for recycling rx buffers if they are not forwarded
> > > to network stack instead of reallocate them from scratch
> > >
> > > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > > ---
> >
> > Felix,
> >
> > could you please drop this patch since it does not help to reduce pressure
> > on page_frag_cache.
>
> What is the problem ? Maybe using kmalloc() instead of page_frag_alloc()
> could help (kmalloc has standard kmem_cache for 2048 bytes object) ?
Hi Stanislaw,
I think the only difference in using a recycle buffer with page_frag_cache is
we are a little bit less greedy in consuming the compound page since in case of
error we will reuse the previously allocated fragment. However we will need to
reallocate a new compound page if we have a leftover fragment that 'locks'
the previous compound (we have the same issue if we do not use the recycle
buffer). Does this 'little' improvement worth a more complex code?
Do you agree or is there something I am missing here?
Regards,
Lorenzo
>
> Thanks
> Stanislaw
On Wed, Dec 05, 2018 at 04:17:31PM +0100, Lorenzo Bianconi wrote:
> > On Wed, Dec 05, 2018 at 11:37:33AM +0100, Lorenzo Bianconi wrote:
> > > >
> > > > Add support for recycling rx buffers if they are not forwarded
> > > > to network stack instead of reallocate them from scratch
> > > >
> > > > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > > > ---
> > >
> > > Felix,
> > >
> > > could you please drop this patch since it does not help to reduce pressure
> > > on page_frag_cache.
> >
> > What is the problem ? Maybe using kmalloc() instead of page_frag_alloc()
> > could help (kmalloc has standard kmem_cache for 2048 bytes object) ?
>
> Hi Stanislaw,
>
> I think the only difference in using a recycle buffer with page_frag_cache is
> we are a little bit less greedy in consuming the compound page since in case of
> error we will reuse the previously allocated fragment. However we will need to
> reallocate a new compound page if we have a leftover fragment that 'locks'
> the previous compound (we have the same issue if we do not use the recycle
> buffer). Does this 'little' improvement worth a more complex code?
> Do you agree or is there something I am missing here?
I was not asking about the patch. I agree it should be droped.
I was asking what is the problem with "pressure on page_frag_cache" and if
using kmalloc() instead of page_frag_alloc() whould be potential solution.
Regards
Stanislaw
>
> Precompute data length in order to avoid to allocate the related
> skb data structure if reported length does not fit in queue buf_size
>
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> drivers/net/wireless/mediatek/mt76/dma.c | 15 +++++++--------
> 1 file changed, 7 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
> index b7fd2e110663..1db163c40dcf 100644
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -417,10 +417,9 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
> static int
> mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
> {
> + int len, data_len, done = 0;
> struct sk_buff *skb;
> unsigned char *data;
> - int len;
> - int done = 0;
> bool more;
>
> while (done < budget) {
> @@ -430,7 +429,12 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
> if (!data)
> break;
>
> - if (q->buf_size < len + q->buf_offset) {
> + if (q->rx_head)
> + data_len = q->buf_size;
> + else
> + data_len = SKB_WITH_OVERHEAD(q->buf_size);
> +
> + if (data_len < len + q->buf_offset) {
> dev_kfree_skb(q->rx_head);
> q->rx_head = NULL;
>
> @@ -448,12 +452,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
> skb_free_frag(data);
> continue;
> }
> -
> skb_reserve(skb, q->buf_offset);
> - if (skb->tail + len > skb->end) {
> - dev_kfree_skb(skb);
> - continue;
> - }
>
> if (q == &dev->q_rx[MT_RXQ_MCU]) {
> u32 *rxfce = (u32 *) skb->cb;
> --
> 2.19.2
>
Hi Felix,
what about this patch?
Regards,
Lorenzo
On 2018-12-03 15:34, Lorenzo Bianconi wrote:
> Precompute data length in order to avoid to allocate the related
> skb data structure if reported length does not fit in queue buf_size
>
> Signed-off-by: Lorenzo Bianconi <[email protected]>
Applied, thanks.
- Felix