Reallocate the skb if there is no enough space to manage the AMSDU rx packets.
Do not always copy the first part of received frames if A-MSDU is enabled
for SG capable devices
Changes since v1:
- do not allocate multiple page buffers but rely on fragmented skbs
if there is no enough space to manage the AMSDU rx packets
@Felix: do you prefer to take this series in your tree or is it better
to merge it in wireless-drivers?
Lorenzo Bianconi (2):
mt76: usb: fix rx A-MSDU support
mt76: usb: do not always copy the first part of received frames
drivers/net/wireless/mediatek/mt76/mt76.h | 4 ++
drivers/net/wireless/mediatek/mt76/usb.c | 64 ++++++++++++++++++-----
2 files changed, 55 insertions(+), 13 deletions(-)
--
2.21.0
Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
for rx") breaks A-MSDU support. When A-MSDU is enable the device can
receive frames up to q->buf_size but they will be discarded in
mt76u_process_rx_entry since there is no enough room for
skb_shared_info. Fix the issue reallocating the skb and copying in the
linear area the first 128B of the received frames and in the frag_list
the remaining part.
Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
drivers/net/wireless/mediatek/mt76/usb.c | 52 +++++++++++++++++++----
2 files changed, 44 insertions(+), 9 deletions(-)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 97a1296562d0..74d4edf941d6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -30,6 +30,7 @@
#define MT_TX_RING_SIZE 256
#define MT_MCU_RING_SIZE 32
#define MT_RX_BUF_SIZE 2048
+#define MT_SKB_HEAD_LEN 128
struct mt76_dev;
struct mt76_wcid;
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index bbaa1365bbda..2bfc8214c0d8 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -429,6 +429,48 @@ static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
return dma_len;
}
+static struct sk_buff *
+mt76u_build_rx_skb(u8 *data, int len, int buf_size,
+ int *nsgs)
+{
+ int data_len = min(len, MT_SKB_HEAD_LEN);
+ struct sk_buff *skb;
+
+ if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
+ /* fast path */
+ skb = build_skb(data, buf_size);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, MT_DMA_HDR_LEN);
+ __skb_put(skb, len);
+
+ return skb;
+ }
+
+ /* slow path, not enough space for data and
+ * skb_shared_info
+ */
+ skb = alloc_skb(data_len, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
+ skb_put_data(skb, data + MT_DMA_HDR_LEN, data_len);
+ data += (data_len + MT_DMA_HDR_LEN);
+ len -= data_len;
+ if (len > 0) {
+ struct page *page = virt_to_head_page(data);
+ int offset = data - (u8 *)page_address(page);
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ page, offset, len, buf_size);
+ } else {
+ *nsgs = 0;
+ }
+
+ return skb;
+}
+
static int
mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb)
{
@@ -446,19 +488,11 @@ mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb)
return 0;
data_len = min_t(int, len, data_len - MT_DMA_HDR_LEN);
- if (MT_DMA_HDR_LEN + data_len > SKB_WITH_OVERHEAD(q->buf_size)) {
- dev_err_ratelimited(dev->dev, "rx data too big %d\n", data_len);
- return 0;
- }
-
- skb = build_skb(data, q->buf_size);
+ skb = mt76u_build_rx_skb(data, data_len, q->buf_size, &nsgs);
if (!skb)
return 0;
- skb_reserve(skb, MT_DMA_HDR_LEN);
- __skb_put(skb, data_len);
len -= data_len;
-
while (len > 0 && nsgs < urb->num_sgs) {
data_len = min_t(int, len, urb->sg[nsgs].length);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
--
2.21.0
Set usb buffer size taking into account skb_shared_info in order to
not always copy the first part of received frames if A-MSDU is enabled
for SG capable devices
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt76.h | 3 +++
drivers/net/wireless/mediatek/mt76/usb.c | 12 ++++++++----
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 74d4edf941d6..7899e9b88b54 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -32,6 +32,9 @@
#define MT_RX_BUF_SIZE 2048
#define MT_SKB_HEAD_LEN 128
+#define MT_BUF_WITH_OVERHEAD(x) \
+ ((x) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
struct mt76_dev;
struct mt76_wcid;
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index 2bfc8214c0d8..5081643ce701 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -286,7 +286,7 @@ static int
mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
int nsgs, gfp_t gfp)
{
- int i;
+ int i, data_size = SKB_WITH_OVERHEAD(q->buf_size);
for (i = 0; i < nsgs; i++) {
struct page *page;
@@ -299,7 +299,7 @@ mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
page = virt_to_head_page(data);
offset = data - page_address(page);
- sg_set_page(&urb->sg[i], page, q->buf_size, offset);
+ sg_set_page(&urb->sg[i], page, data_size, offset);
}
if (i < nsgs) {
@@ -311,7 +311,7 @@ mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
}
urb->num_sgs = max_t(int, i, urb->num_sgs);
- urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
+ urb->transfer_buffer_length = urb->num_sgs * data_size;
sg_init_marker(urb->sg, urb->num_sgs);
return i ? : -ENOMEM;
@@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
if (!q->entry)
return -ENOMEM;
- q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
+ if (dev->usb.sg_en)
+ q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
+ else
+ q->buf_size = PAGE_SIZE;
q->ndesc = MT_NUM_RX_ENTRIES;
+
for (i = 0; i < q->ndesc; i++) {
err = mt76u_rx_urb_alloc(dev, &q->entry[i]);
if (err < 0)
--
2.21.0
On 2019-05-31 11:38, Lorenzo Bianconi wrote:
> Reallocate the skb if there is no enough space to manage the AMSDU rx packets.
> Do not always copy the first part of received frames if A-MSDU is enabled
> for SG capable devices
>
> Changes since v1:
> - do not allocate multiple page buffers but rely on fragmented skbs
> if there is no enough space to manage the AMSDU rx packets
>
> @Felix: do you prefer to take this series in your tree or is it better
> to merge it in wireless-drivers?
Going through wireless-drivers is fine with me.
Acked-by: Felix Fietkau <[email protected]>
- Felix
Hi and sorry for late comment.
On Fri, May 31, 2019 at 11:38:22AM +0200, Lorenzo Bianconi wrote:
> Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
> for rx") breaks A-MSDU support. When A-MSDU is enable the device can
> receive frames up to q->buf_size but they will be discarded in
> mt76u_process_rx_entry since there is no enough room for
> skb_shared_info. Fix the issue reallocating the skb and copying in the
> linear area the first 128B of the received frames and in the frag_list
> the remaining part.
>
> Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
> drivers/net/wireless/mediatek/mt76/usb.c | 52 +++++++++++++++++++----
> 2 files changed, 44 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 97a1296562d0..74d4edf941d6 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -30,6 +30,7 @@
> #define MT_TX_RING_SIZE 256
> #define MT_MCU_RING_SIZE 32
> #define MT_RX_BUF_SIZE 2048
> +#define MT_SKB_HEAD_LEN 128
>
> struct mt76_dev;
> struct mt76_wcid;
> diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> index bbaa1365bbda..2bfc8214c0d8 100644
> --- a/drivers/net/wireless/mediatek/mt76/usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> @@ -429,6 +429,48 @@ static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
> return dma_len;
> }
>
> +static struct sk_buff *
> +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> + int *nsgs)
> +{
> + int data_len = min(len, MT_SKB_HEAD_LEN);
> + struct sk_buff *skb;
> +
> + if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> + /* fast path */
> + skb = build_skb(data, buf_size);
> + if (!skb)
> + return NULL;
> +
> + skb_reserve(skb, MT_DMA_HDR_LEN);
> + __skb_put(skb, len);
> +
> + return skb;
> + }
> +
> + /* slow path, not enough space for data and
> + * skb_shared_info
> + */
> + skb = alloc_skb(data_len, GFP_ATOMIC);
> + if (!skb)
> + return NULL;
> +
> + skb_put_data(skb, data + MT_DMA_HDR_LEN, data_len);
mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
of the buffer in fragment, which supose to be more efficient,
see comment in iwl_mvm_pass_packet_to_mac80211().
> + data += (data_len + MT_DMA_HDR_LEN);
> + len -= data_len;
> + if (len > 0) {
> + struct page *page = virt_to_head_page(data);
> + int offset = data - (u8 *)page_address(page);
u8 cast not needed.
> + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
> + page, offset, len, buf_size);
> + } else {
> + *nsgs = 0;
This seems to be not necessary or a bug (use first buffer 2 times).
Stanislaw
On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:
> Set usb buffer size taking into account skb_shared_info in order to
> not always copy the first part of received frames if A-MSDU is enabled
> for SG capable devices
>
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> drivers/net/wireless/mediatek/mt76/mt76.h | 3 +++
> drivers/net/wireless/mediatek/mt76/usb.c | 12 ++++++++----
> 2 files changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 74d4edf941d6..7899e9b88b54 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -32,6 +32,9 @@
> #define MT_RX_BUF_SIZE 2048
> #define MT_SKB_HEAD_LEN 128
>
> +#define MT_BUF_WITH_OVERHEAD(x) \
> + ((x) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
> +
> struct mt76_dev;
> struct mt76_wcid;
>
> diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> index 2bfc8214c0d8..5081643ce701 100644
> --- a/drivers/net/wireless/mediatek/mt76/usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> @@ -286,7 +286,7 @@ static int
> mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
> int nsgs, gfp_t gfp)
> {
> - int i;
> + int i, data_size = SKB_WITH_OVERHEAD(q->buf_size);
>
> for (i = 0; i < nsgs; i++) {
> struct page *page;
> @@ -299,7 +299,7 @@ mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
>
> page = virt_to_head_page(data);
> offset = data - page_address(page);
> - sg_set_page(&urb->sg[i], page, q->buf_size, offset);
> + sg_set_page(&urb->sg[i], page, data_size, offset);
> }
>
> if (i < nsgs) {
> @@ -311,7 +311,7 @@ mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
> }
>
> urb->num_sgs = max_t(int, i, urb->num_sgs);
> - urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> + urb->transfer_buffer_length = urb->num_sgs * data_size;
> sg_init_marker(urb->sg, urb->num_sgs);
>
> return i ? : -ENOMEM;
> @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
> if (!q->entry)
> return -ENOMEM;
>
> - q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> + if (dev->usb.sg_en)
> + q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
I strongly recommend to not doing this. While this should work
in theory creating buffer with size of 2k + some bytes might
trigger various bugs in dma mapping or other low level code.
And skb_shered_info is needed only in first buffer IIUC.
Also this patch seems to make first patch unnecessary except for
non sg_en case (in which I think rx AMSDU is broken anyway),
so I would prefer just to apply first patch.
Stanislaw
> Hi and sorry for late comment.
>
Hi Stanislaw,
> On Fri, May 31, 2019 at 11:38:22AM +0200, Lorenzo Bianconi wrote:
> > Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
> > for rx") breaks A-MSDU support. When A-MSDU is enable the device can
> > receive frames up to q->buf_size but they will be discarded in
> > mt76u_process_rx_entry since there is no enough room for
> > skb_shared_info. Fix the issue reallocating the skb and copying in the
> > linear area the first 128B of the received frames and in the frag_list
> > the remaining part.
> >
> > Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> > drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
> > drivers/net/wireless/mediatek/mt76/usb.c | 52 +++++++++++++++++++----
> > 2 files changed, 44 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> > index 97a1296562d0..74d4edf941d6 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> > +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> > @@ -30,6 +30,7 @@
> > #define MT_TX_RING_SIZE 256
> > #define MT_MCU_RING_SIZE 32
> > #define MT_RX_BUF_SIZE 2048
> > +#define MT_SKB_HEAD_LEN 128
> >
> > struct mt76_dev;
> > struct mt76_wcid;
> > diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> > index bbaa1365bbda..2bfc8214c0d8 100644
> > --- a/drivers/net/wireless/mediatek/mt76/usb.c
> > +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> > @@ -429,6 +429,48 @@ static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
> > return dma_len;
> > }
> >
> > +static struct sk_buff *
> > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > + int *nsgs)
> > +{
> > + int data_len = min(len, MT_SKB_HEAD_LEN);
> > + struct sk_buff *skb;
> > +
> > + if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> > + /* fast path */
> > + skb = build_skb(data, buf_size);
> > + if (!skb)
> > + return NULL;
> > +
> > + skb_reserve(skb, MT_DMA_HDR_LEN);
> > + __skb_put(skb, len);
> > +
> > + return skb;
> > + }
> > +
> > + /* slow path, not enough space for data and
> > + * skb_shared_info
> > + */
> > + skb = alloc_skb(data_len, GFP_ATOMIC);
> > + if (!skb)
> > + return NULL;
> > +
> > + skb_put_data(skb, data + MT_DMA_HDR_LEN, data_len);
> mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> of the buffer in fragment, which supose to be more efficient,
> see comment in iwl_mvm_pass_packet_to_mac80211().
Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
header in the linear area of the skb since otherwise the stack will need to
align them
>
> > + data += (data_len + MT_DMA_HDR_LEN);
> > + len -= data_len;
> > + if (len > 0) {
> > + struct page *page = virt_to_head_page(data);
> > + int offset = data - (u8 *)page_address(page);
> u8 cast not needed.
>
> > + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
> > + page, offset, len, buf_size);
> > + } else {
> > + *nsgs = 0;
> This seems to be not necessary or a bug (use first buffer 2 times).
maybe I have been a little bit too paranoid here :)
Regards,
Lorenzo
>
> Stanislaw
> On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:
[...]
> > }
> >
> > urb->num_sgs = max_t(int, i, urb->num_sgs);
> > - urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> > + urb->transfer_buffer_length = urb->num_sgs * data_size;
> > sg_init_marker(urb->sg, urb->num_sgs);
> >
> > return i ? : -ENOMEM;
> > @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
> > if (!q->entry)
> > return -ENOMEM;
> >
> > - q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> > + if (dev->usb.sg_en)
> > + q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
>
> I strongly recommend to not doing this. While this should work
> in theory creating buffer with size of 2k + some bytes might
> trigger various bugs in dma mapping or other low level code.
even in practice actually :) but we can be more cautious since probably copying
the first 128B will not make any difference
>
> And skb_shered_info is needed only in first buffer IIUC.
>
> Also this patch seems to make first patch unnecessary except for
> non sg_en case (in which I think rx AMSDU is broken anyway),
> so I would prefer just to apply first patch.
I do not think rx AMSDU is broken for non sg_en case since the max rx value
allowed should be 3839 IIRC and we alloc one page in this case
Regards,
Lorenzo
>
> Stanislaw
>
On Wed, Jun 12, 2019 at 11:45:21AM +0200, Lorenzo Bianconi wrote:
> > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > > + int *nsgs)
> > > +{
> > > + int data_len = min(len, MT_SKB_HEAD_LEN);
Oh, and this looks unneeded as well as for len < MT_SKB_HEAD_LEN=128
we will go through fast path.
> > mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> > of the buffer in fragment, which supose to be more efficient,
> > see comment in iwl_mvm_pass_packet_to_mac80211().
>
> Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
> header in the linear area of the skb since otherwise the stack will need to
> align them
Not sure if understand, I think aliment of L3 & L4 headers will be
the same, assuming ieee80211 header is aligned the same in fragment
buffer and in linear area. But if you think this is better to copy those
to linear area I'm ok with that.
Stanislaw
On Jun 12, Stanislaw Gruszka wrote:
> On Wed, Jun 12, 2019 at 11:45:21AM +0200, Lorenzo Bianconi wrote:
> > > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > > > + int *nsgs)
> > > > +{
> > > > + int data_len = min(len, MT_SKB_HEAD_LEN);
>
> Oh, and this looks unneeded as well as for len < MT_SKB_HEAD_LEN=128
> we will go through fast path.
I guess if we remove data_len = min(len, MT_SKB_HEAD_LEN) and even *nsgs = 0 at
the end we are making some assumptions on the value of MT_SKB_HEAD_LEN and
buf_size. In the patch I just avoided them but maybe we can just assume that
MT_SKB_HEAD_LEN and buf_size will not changed in the future. What do you
think?
>
> > > mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> > > of the buffer in fragment, which supose to be more efficient,
> > > see comment in iwl_mvm_pass_packet_to_mac80211().
> >
> > Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
> > header in the linear area of the skb since otherwise the stack will need to
> > align them
>
> Not sure if understand, I think aliment of L3 & L4 headers will be
> the same, assuming ieee80211 header is aligned the same in fragment
> buffer and in linear area. But if you think this is better to copy those
> to linear area I'm ok with that.
Sorry I have not been so clear. I mean in the stack before accessing a given
header we will run pskb_may_pull() that can end up copying the skb if there is
not enough space in the skb->head
Regards,
Lorenzo
>
> Stanislaw
On Wed, Jun 12, 2019 at 11:53:03AM +0200, Lorenzo Bianconi wrote:
> > On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:
>
> [...]
>
> > > }
> > >
> > > urb->num_sgs = max_t(int, i, urb->num_sgs);
> > > - urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> > > + urb->transfer_buffer_length = urb->num_sgs * data_size;
> > > sg_init_marker(urb->sg, urb->num_sgs);
> > >
> > > return i ? : -ENOMEM;
> > > @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
> > > if (!q->entry)
> > > return -ENOMEM;
> > >
> > > - q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> > > + if (dev->usb.sg_en)
> > > + q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
> >
> > I strongly recommend to not doing this. While this should work
> > in theory creating buffer with size of 2k + some bytes might
> > trigger various bugs in dma mapping or other low level code.
>
> even in practice actually :)
I wouldn't be sure about this. It's not common to have buffers of
such size and crossing pages boundaries. It really can trigger
nasty bugs on various IOMMU drivers.
> but we can be more cautious since probably copying
> the first 128B will not make any difference
Not sure if I understand what you mean.
> > And skb_shered_info is needed only in first buffer IIUC.
> >
> > Also this patch seems to make first patch unnecessary except for
> > non sg_en case (in which I think rx AMSDU is broken anyway),
> > so I would prefer just to apply first patch.
>
> I do not think rx AMSDU is broken for non sg_en case since the max rx value
> allowed should be 3839 IIRC and we alloc one page in this case
If that's the case we should be fine, but then I do not understand
why we allocate 8*2k buffers for sg_en case, isn't that AP can
sent AMSDU frame 16k big?
Stanislaw
> On Wed, Jun 12, 2019 at 11:53:03AM +0200, Lorenzo Bianconi wrote:
> > > On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:
> >
> > [...]
> >
> > > > }
> > > >
> > > > urb->num_sgs = max_t(int, i, urb->num_sgs);
> > > > - urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> > > > + urb->transfer_buffer_length = urb->num_sgs * data_size;
> > > > sg_init_marker(urb->sg, urb->num_sgs);
> > > >
> > > > return i ? : -ENOMEM;
> > > > @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
> > > > if (!q->entry)
> > > > return -ENOMEM;
> > > >
> > > > - q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> > > > + if (dev->usb.sg_en)
> > > > + q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
> > >
> > > I strongly recommend to not doing this. While this should work
> > > in theory creating buffer with size of 2k + some bytes might
> > > trigger various bugs in dma mapping or other low level code.
> >
> > even in practice actually :)
>
> I wouldn't be sure about this. It's not common to have buffers of
> such size and crossing pages boundaries. It really can trigger
> nasty bugs on various IOMMU drivers.
I was just joking, I mean that it worked in the tests I carried out, but I
agree it can trigger some issues in buggy IOMMU drivers
>
> > but we can be more cautious since probably copying
> > the first 128B will not make any difference
>
> Not sure if I understand what you mean.
Please correct me if I am wrong but I think max amsdu rx size is 3839B for
mt76. For the sg_en case this frame will span over multiple sg buffers since
sg buffer size is 2048B (2 sg buffers). Moreover if we do not take into account
skb_shared_info when configuring the sg buffer size we will need to always copy
the first 128B of the first buffer since received len will be set to 2048 and
the following if condition will always fail:
if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
}
>
> > > And skb_shered_info is needed only in first buffer IIUC.
> > >
> > > Also this patch seems to make first patch unnecessary except for
> > > non sg_en case (in which I think rx AMSDU is broken anyway),
> > > so I would prefer just to apply first patch.
> >
> > I do not think rx AMSDU is broken for non sg_en case since the max rx value
> > allowed should be 3839 IIRC and we alloc one page in this case
>
> If that's the case we should be fine, but then I do not understand
> why we allocate 8*2k buffers for sg_en case, isn't that AP can
> sent AMSDU frame 16k big?
Sorry I did not get what you mean here, could you please explain?
Regards,
Lorenzo
>
> Stanislaw
>
On Wed, Jun 12, 2019 at 12:21:34PM +0200, Lorenzo Bianconi wrote:
> On Jun 12, Stanislaw Gruszka wrote:
> > On Wed, Jun 12, 2019 at 11:45:21AM +0200, Lorenzo Bianconi wrote:
> > > > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > > > > + int *nsgs)
> > > > > +{
> > > > > + int data_len = min(len, MT_SKB_HEAD_LEN);
> >
> > Oh, and this looks unneeded as well as for len < MT_SKB_HEAD_LEN=128
> > we will go through fast path.
>
> I guess if we remove data_len = min(len, MT_SKB_HEAD_LEN) and even *nsgs = 0 at
> the end we are making some assumptions on the value of MT_SKB_HEAD_LEN and
> buf_size. In the patch I just avoided them but maybe we can just assume that
> MT_SKB_HEAD_LEN and buf_size will not changed in the future. What do you
> think?
Yes, sure. Other drivers just use 128 value directly and don't even
create a macro for that. And if somebody will decide to change
buf_size it will not be small value.
> > > > mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> > > > of the buffer in fragment, which supose to be more efficient,
> > > > see comment in iwl_mvm_pass_packet_to_mac80211().
> > >
> > > Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
> > > header in the linear area of the skb since otherwise the stack will need to
> > > align them
> >
> > Not sure if understand, I think aliment of L3 & L4 headers will be
> > the same, assuming ieee80211 header is aligned the same in fragment
> > buffer and in linear area. But if you think this is better to copy those
> > to linear area I'm ok with that.
>
> Sorry I have not been so clear. I mean in the stack before accessing a given
> header we will run pskb_may_pull() that can end up copying the skb if there is
> not enough space in the skb->head
Ok, so L3 and L4 headers should be in linear area of skb and if not
network stack will copy them from fragment. But I wonder why other
drivers just copy ieee80211_hdr and SNAP ? Isn't that if we copy
128B then is possible that part of the payload will be in linear
area and part in fragment, whereas is expected that payload
will not be broken into two parts?
Stanislaw
> On Wed, Jun 12, 2019 at 12:21:34PM +0200, Lorenzo Bianconi wrote:
> > On Jun 12, Stanislaw Gruszka wrote:
> > > On Wed, Jun 12, 2019 at 11:45:21AM +0200, Lorenzo Bianconi wrote:
> > > > > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > > > > > + int *nsgs)
> > > > > > +{
> > > > > > + int data_len = min(len, MT_SKB_HEAD_LEN);
> > >
> > > Oh, and this looks unneeded as well as for len < MT_SKB_HEAD_LEN=128
> > > we will go through fast path.
> >
> > I guess if we remove data_len = min(len, MT_SKB_HEAD_LEN) and even *nsgs = 0 at
> > the end we are making some assumptions on the value of MT_SKB_HEAD_LEN and
> > buf_size. In the patch I just avoided them but maybe we can just assume that
> > MT_SKB_HEAD_LEN and buf_size will not changed in the future. What do you
> > think?
>
> Yes, sure. Other drivers just use 128 value directly and don't even
> create a macro for that. And if somebody will decide to change
> buf_size it will not be small value.
Ok, I will simplify it in v2
>
> > > > > mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> > > > > of the buffer in fragment, which supose to be more efficient,
> > > > > see comment in iwl_mvm_pass_packet_to_mac80211().
> > > >
> > > > Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
> > > > header in the linear area of the skb since otherwise the stack will need to
> > > > align them
> > >
> > > Not sure if understand, I think aliment of L3 & L4 headers will be
> > > the same, assuming ieee80211 header is aligned the same in fragment
> > > buffer and in linear area. But if you think this is better to copy those
> > > to linear area I'm ok with that.
> >
> > Sorry I have not been so clear. I mean in the stack before accessing a given
> > header we will run pskb_may_pull() that can end up copying the skb if there is
> > not enough space in the skb->head
>
> Ok, so L3 and L4 headers should be in linear area of skb and if not
> network stack will copy them from fragment. But I wonder why other
> drivers just copy ieee80211_hdr and SNAP ? Isn't that if we copy
> 128B then is possible that part of the payload will be in linear
> area and part in fragment, whereas is expected that payload
> will not be broken into two parts?
I think the payload can be split in multiple skb fragments, the constraint is
just related to header parsing
Regards,
Lorenzo
>
> Stanislaw
On Wed, Jun 12, 2019 at 12:49:22PM +0200, Lorenzo Bianconi wrote:
> > On Wed, Jun 12, 2019 at 11:53:03AM +0200, Lorenzo Bianconi wrote:
> > > > On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:
> > >
> > > [...]
> > >
> > > > > }
> > > > >
> > > > > urb->num_sgs = max_t(int, i, urb->num_sgs);
> > > > > - urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> > > > > + urb->transfer_buffer_length = urb->num_sgs * data_size;
> > > > > sg_init_marker(urb->sg, urb->num_sgs);
> > > > >
> > > > > return i ? : -ENOMEM;
> > > > > @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
> > > > > if (!q->entry)
> > > > > return -ENOMEM;
> > > > >
> > > > > - q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> > > > > + if (dev->usb.sg_en)
> > > > > + q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
> > > >
> > > > I strongly recommend to not doing this. While this should work
> > > > in theory creating buffer with size of 2k + some bytes might
> > > > trigger various bugs in dma mapping or other low level code.
> > >
> > > even in practice actually :)
> >
> > I wouldn't be sure about this. It's not common to have buffers of
> > such size and crossing pages boundaries. It really can trigger
> > nasty bugs on various IOMMU drivers.
>
> I was just joking, I mean that it worked in the tests I carried out, but I
> agree it can trigger some issues in buggy IOMMU drivers
My sense of humor declined quite drastically lastly ;-/
> > > but we can be more cautious since probably copying
> > > the first 128B will not make any difference
> >
> > Not sure if I understand what you mean.
>
> Please correct me if I am wrong but I think max amsdu rx size is 3839B for
> mt76. For the sg_en case this frame will span over multiple sg buffers since
> sg buffer size is 2048B (2 sg buffers). Moreover if we do not take into account
> skb_shared_info when configuring the sg buffer size we will need to always copy
> the first 128B of the first buffer since received len will be set to 2048 and
> the following if condition will always fail:
>
> if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> }
Ok, that I understand.
> > > > And skb_shered_info is needed only in first buffer IIUC.
> > > >
> > > > Also this patch seems to make first patch unnecessary except for
> > > > non sg_en case (in which I think rx AMSDU is broken anyway),
> > > > so I would prefer just to apply first patch.
> > >
> > > I do not think rx AMSDU is broken for non sg_en case since the max rx value
> > > allowed should be 3839 IIRC and we alloc one page in this case
> >
> > If that's the case we should be fine, but then I do not understand
> > why we allocate 8*2k buffers for sg_en case, isn't that AP can
> > sent AMSDU frame 16k big?
>
> Sorry I did not get what you mean here, could you please explain?
If max RX AMSDU size is 3839B I do not see reason why we allocate
MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
I thought the reason is that max AMSDU size is 16kB so it fit into
8 sg buffers of 2k.
In other words, for me, looks like either
- we can not handle AMSDU for non sg case because we do not
allocate big enough buffer
or
- we can just use one PAGE_SIZE buffer for rx and remove sg
buffers for rx completely
Do I miss something ?
Stanislaw
[...]
>
> My sense of humor declined quite drastically lastly ;-/
>
> > > > but we can be more cautious since probably copying
> > > > the first 128B will not make any difference
> > >
> > > Not sure if I understand what you mean.
> >
> > Please correct me if I am wrong but I think max amsdu rx size is 3839B for
> > mt76. For the sg_en case this frame will span over multiple sg buffers since
> > sg buffer size is 2048B (2 sg buffers). Moreover if we do not take into account
> > skb_shared_info when configuring the sg buffer size we will need to always copy
> > the first 128B of the first buffer since received len will be set to 2048 and
> > the following if condition will always fail:
> >
> > if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> > }
>
> Ok, that I understand.
>
> > > > > And skb_shered_info is needed only in first buffer IIUC.
> > > > >
> > > > > Also this patch seems to make first patch unnecessary except for
> > > > > non sg_en case (in which I think rx AMSDU is broken anyway),
> > > > > so I would prefer just to apply first patch.
> > > >
> > > > I do not think rx AMSDU is broken for non sg_en case since the max rx value
> > > > allowed should be 3839 IIRC and we alloc one page in this case
> > >
> > > If that's the case we should be fine, but then I do not understand
> > > why we allocate 8*2k buffers for sg_en case, isn't that AP can
> > > sent AMSDU frame 16k big?
> >
> > Sorry I did not get what you mean here, could you please explain?
>
> If max RX AMSDU size is 3839B I do not see reason why we allocate
> MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> I thought the reason is that max AMSDU size is 16kB so it fit into
> 8 sg buffers of 2k.
>
> In other words, for me, looks like either
> - we can not handle AMSDU for non sg case because we do not
> allocate big enough buffer
I think AMSDU is mandatory and we currently support it even for non-sg case
(since max rx AMSDU is 3839B)
> or
> - we can just use one PAGE_SIZE buffer for rx and remove sg
> buffers for rx completely
using sg buffers we can support bigger rx AMSDU size in the future without using
huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
mt76x2u)
Regards,
Lorenzo
>
> Do I miss something ?
>
> Stanislaw
On Wed, Jun 12, 2019 at 02:28:48PM +0200, Lorenzo Bianconi wrote:
> [...]
> >
> > My sense of humor declined quite drastically lastly ;-/
> >
> > > > > but we can be more cautious since probably copying
> > > > > the first 128B will not make any difference
> > > >
> > > > Not sure if I understand what you mean.
> > >
> > > Please correct me if I am wrong but I think max amsdu rx size is 3839B for
> > > mt76. For the sg_en case this frame will span over multiple sg buffers since
> > > sg buffer size is 2048B (2 sg buffers). Moreover if we do not take into account
> > > skb_shared_info when configuring the sg buffer size we will need to always copy
> > > the first 128B of the first buffer since received len will be set to 2048 and
> > > the following if condition will always fail:
> > >
> > > if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> > > }
> >
> > Ok, that I understand.
> >
> > > > > > And skb_shered_info is needed only in first buffer IIUC.
> > > > > >
> > > > > > Also this patch seems to make first patch unnecessary except for
> > > > > > non sg_en case (in which I think rx AMSDU is broken anyway),
> > > > > > so I would prefer just to apply first patch.
> > > > >
> > > > > I do not think rx AMSDU is broken for non sg_en case since the max rx value
> > > > > allowed should be 3839 IIRC and we alloc one page in this case
> > > >
> > > > If that's the case we should be fine, but then I do not understand
> > > > why we allocate 8*2k buffers for sg_en case, isn't that AP can
> > > > sent AMSDU frame 16k big?
> > >
> > > Sorry I did not get what you mean here, could you please explain?
> >
> > If max RX AMSDU size is 3839B I do not see reason why we allocate
> > MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> > I thought the reason is that max AMSDU size is 16kB so it fit into
> > 8 sg buffers of 2k.
> >
> > In other words, for me, looks like either
> > - we can not handle AMSDU for non sg case because we do not
> > allocate big enough buffer
>
> I think AMSDU is mandatory and we currently support it even for non-sg case
> (since max rx AMSDU is 3839B)
>
> > or
> > - we can just use one PAGE_SIZE buffer for rx and remove sg
> > buffers for rx completely
>
> using sg buffers we can support bigger rx AMSDU size in the future without using
> huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> mt76x2u)
I think it would be simpler just to allocate 2 pages for 7935B .
Stanislaw
On Wed, Jun 12, 2019 at 02:59:05PM +0200, Stanislaw Gruszka wrote:
> > > If max RX AMSDU size is 3839B I do not see reason why we allocate
> > > MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> > > I thought the reason is that max AMSDU size is 16kB so it fit into
> > > 8 sg buffers of 2k.
> > >
> > > In other words, for me, looks like either
> > > - we can not handle AMSDU for non sg case because we do not
> > > allocate big enough buffer
> >
> > I think AMSDU is mandatory and we currently support it even for non-sg case
> > (since max rx AMSDU is 3839B)
> >
> > > or
> > > - we can just use one PAGE_SIZE buffer for rx and remove sg
> > > buffers for rx completely
> >
> > using sg buffers we can support bigger rx AMSDU size in the future without using
> > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > mt76x2u)
>
> I think it would be simpler just to allocate 2 pages for 7935B .
And if we could determine that there is no true need to use sg for rx,
I think best approach here would be revert f8f527b16db5 in v5.2 to fix
regression and remove rx sg in -next. That would make code simpler,
allocate 4k instead 16k per packet, allow to use build_skb (4096 - 3839
give enough space for shared info) and not use usb hcd bounce buffer.
Stanislaw
> On Wed, Jun 12, 2019 at 02:28:48PM +0200, Lorenzo Bianconi wrote:
[...]
> >
> > using sg buffers we can support bigger rx AMSDU size in the future without using
> > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > mt76x2u)
>
> I think it would be simpler just to allocate 2 pages for 7935B .
>
We should avoid increasing buffer size to more than PAGE_SIZE for
performance reasons. As suggested by Felix what about of setting buf_size to
PAGE_SIZE for both sg and non-sg cases and for sg setting usb data size to
SKB_WITH_OVERHEAD(q->buf_size) & (usb_endpoint_maxp() - 1);
Regards,
Lorenzo
> Stanislaw
On Jun 12, Stanislaw Gruszka wrote:
> On Wed, Jun 12, 2019 at 02:59:05PM +0200, Stanislaw Gruszka wrote:
> > > > If max RX AMSDU size is 3839B I do not see reason why we allocate
> > > > MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> > > > I thought the reason is that max AMSDU size is 16kB so it fit into
> > > > 8 sg buffers of 2k.
> > > >
> > > > In other words, for me, looks like either
> > > > - we can not handle AMSDU for non sg case because we do not
> > > > allocate big enough buffer
> > >
> > > I think AMSDU is mandatory and we currently support it even for non-sg case
> > > (since max rx AMSDU is 3839B)
> > >
> > > > or
> > > > - we can just use one PAGE_SIZE buffer for rx and remove sg
> > > > buffers for rx completely
> > >
> > > using sg buffers we can support bigger rx AMSDU size in the future without using
> > > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > > mt76x2u)
> >
> > I think it would be simpler just to allocate 2 pages for 7935B .
>
> And if we could determine that there is no true need to use sg for rx,
> I think best approach here would be revert f8f527b16db5 in v5.2 to fix
> regression and remove rx sg in -next. That would make code simpler,
> allocate 4k instead 16k per packet, allow to use build_skb (4096 - 3839
> give enough space for shared info) and not use usb hcd bounce buffer.
I do not think we should drop sg support since:
- it allow us to rx huge amsdu frames (e.g. IEEE80211_MAX_MPDU_LEN_VHT_11454)
using multiple one page buffer. I think there will be new usb devices where we can
increase amsdu size (we can increase it even on mt76x2u usb 3.0 devices)
- without SG we can't use build_skb() without copying a given size of the packet since the
space needed for skb_shared_info is 320B on a x86_64 device
- the fix for f8f527b16db5 has been already tested
Regards,
Lorenzo
>
> Stanislaw
Hi
On Thu, Jun 13, 2019 at 10:26:38AM +0200, Lorenzo Bianconi wrote:
> [...]
>
> > I looked at intel wifi drivers and this is handled by amsdu_size module
> > parameter, supported values are 4k, 8k and 12k. RX allocation size and
> > proper values in vht_cap & ht_cap are set accordingly. Assuming (some)
> > mt76 HW and FW can handle bigger AMSDUs I think we should do similar
> > thing.
> >
> > Otherwise looks for me, we just waste memory and have not needed code
> > for no true reason.
> >
> > > space needed for skb_shared_info is 320B on a x86_64 device
> >
> > Uhh, I haven't expected that sk_shared_info() is that big, so indeed build_skb
> > could not used and 128B copy fallback will be necessary.
>
> Hi Stanislaw,
>
> reviewing the original patch I think we can't trigger any IOMMU bug since the
> usb buffer length is actually 2048 and not 2048 + skb_shared_info_size:
I'm concerned about alignment and crossing pages boundaries. If you
allocate via page_frag_alloc() buffers, except first one, will have
'not natural' alignment and every second will be spanned across
two pages.
Stanislaw
> Hi
>
> On Thu, Jun 13, 2019 at 10:26:38AM +0200, Lorenzo Bianconi wrote:
> > [...]
> >
> > > I looked at intel wifi drivers and this is handled by amsdu_size module
> > > parameter, supported values are 4k, 8k and 12k. RX allocation size and
> > > proper values in vht_cap & ht_cap are set accordingly. Assuming (some)
> > > mt76 HW and FW can handle bigger AMSDUs I think we should do similar
> > > thing.
> > >
> > > Otherwise looks for me, we just waste memory and have not needed code
> > > for no true reason.
> > >
> > > > space needed for skb_shared_info is 320B on a x86_64 device
> > >
> > > Uhh, I haven't expected that sk_shared_info() is that big, so indeed build_skb
> > > could not used and 128B copy fallback will be necessary.
> >
> > Hi Stanislaw,
> >
> > reviewing the original patch I think we can't trigger any IOMMU bug since the
> > usb buffer length is actually 2048 and not 2048 + skb_shared_info_size:
>
> I'm concerned about alignment and crossing pages boundaries. If you
> allocate via page_frag_alloc() buffers, except first one, will have
> 'not natural' alignment and every second will be spanned across
> two pages.
ack, so I think the second approach will be safer (using roundup instead of
rounddown :))
Regards,
Lorenzo
>
> Stanislaw
>
[...]
> I looked at intel wifi drivers and this is handled by amsdu_size module
> parameter, supported values are 4k, 8k and 12k. RX allocation size and
> proper values in vht_cap & ht_cap are set accordingly. Assuming (some)
> mt76 HW and FW can handle bigger AMSDUs I think we should do similar
> thing.
>
> Otherwise looks for me, we just waste memory and have not needed code
> for no true reason.
>
> > space needed for skb_shared_info is 320B on a x86_64 device
>
> Uhh, I haven't expected that sk_shared_info() is that big, so indeed build_skb
> could not used and 128B copy fallback will be necessary.
Hi Stanislaw,
reviewing the original patch I think we can't trigger any IOMMU bug since the
usb buffer length is actually 2048 and not 2048 + skb_shared_info_size:
in mt76u_fill_rx_sg()
data_size = SKB_WITH_OVERHEAD(q->buf_size);
q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
where MT_BUF_WITH_OVERHEAD is
((x) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
anyway we can even set
q->buf_size = PAGE_SIZE
data_size = rounddown(SKB_WITH_OVERHEAD(q->buf_size), usb_endpoint_maxp())
Regards,
Lorenzo
>
>
> Stanislaw
On Wed, Jun 12, 2019 at 04:27:42PM +0200, Lorenzo Bianconi wrote:
> > On Wed, Jun 12, 2019 at 02:28:48PM +0200, Lorenzo Bianconi wrote:
>
> [...]
>
> > >
> > > using sg buffers we can support bigger rx AMSDU size in the future without using
> > > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > > mt76x2u)
> > I think it would be simpler just to allocate 2 pages for 7935B .
> >
>
> We should avoid increasing buffer size to more than PAGE_SIZE for
> performance reasons.
Interesting, at what place and how this affect performance negatively ?
> As suggested by Felix what about of setting buf_size to
> PAGE_SIZE for both sg and non-sg cases and for sg setting usb data size to
>
> SKB_WITH_OVERHEAD(q->buf_size) & (usb_endpoint_maxp() - 1);
Increasing to PAGE_SIZE for sg looks reasonable to me.
Not sure if understand data_size logic. If this mean 'PAGE_SIZE - usb_endpint_maxp()',
looks ok to me as well (for first segment), but would require one extra segment
to avoid coping (i.e. 2 pages for 3839 , 3 pages for 7935 ...) If we would
like to stay with 128B copy fallback, we can have 1 page for 3839 , 2 for 7935 ...
It would be interesting how frequently AMSDU of max size is sent by
remote station. If this is rare situation I would be opting for
smaller allocation and 128B copy fallback. If this is frequent
for bigger allocation to assure we can always use build_skb().
Stanislaw
On Wed, Jun 12, 2019 at 04:44:01PM +0200, Lorenzo Bianconi wrote:
> On Jun 12, Stanislaw Gruszka wrote:
> > On Wed, Jun 12, 2019 at 02:59:05PM +0200, Stanislaw Gruszka wrote:
> > > > > If max RX AMSDU size is 3839B I do not see reason why we allocate
> > > > > MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> > > > > I thought the reason is that max AMSDU size is 16kB so it fit into
> > > > > 8 sg buffers of 2k.
> > > > >
> > > > > In other words, for me, looks like either
> > > > > - we can not handle AMSDU for non sg case because we do not
> > > > > allocate big enough buffer
> > > >
> > > > I think AMSDU is mandatory and we currently support it even for non-sg case
> > > > (since max rx AMSDU is 3839B)
> > > >
> > > > > or
> > > > > - we can just use one PAGE_SIZE buffer for rx and remove sg
> > > > > buffers for rx completely
> > > >
> > > > using sg buffers we can support bigger rx AMSDU size in the future without using
> > > > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > > > mt76x2u)
> > >
> > > I think it would be simpler just to allocate 2 pages for 7935B .
> >
> > And if we could determine that there is no true need to use sg for rx,
> > I think best approach here would be revert f8f527b16db5 in v5.2 to fix
> > regression and remove rx sg in -next. That would make code simpler,
> > allocate 4k instead 16k per packet, allow to use build_skb (4096 - 3839
> > give enough space for shared info) and not use usb hcd bounce buffer.
>
> I do not think we should drop sg support since:
> - it allow us to rx huge amsdu frames (e.g. IEEE80211_MAX_MPDU_LEN_VHT_11454)
> using multiple one page buffer. I think there will be new usb devices where we can
> increase amsdu size (we can increase it even on mt76x2u usb 3.0 devices)
I looked at intel wifi drivers and this is handled by amsdu_size module
parameter, supported values are 4k, 8k and 12k. RX allocation size and
proper values in vht_cap & ht_cap are set accordingly. Assuming (some)
mt76 HW and FW can handle bigger AMSDUs I think we should do similar
thing.
Otherwise looks for me, we just waste memory and have not needed code
for no true reason.
> space needed for skb_shared_info is 320B on a x86_64 device
Uhh, I haven't expected that sk_shared_info() is that big, so indeed build_skb
could not used and 128B copy fallback will be necessary.
Stanislaw