2016-03-02 12:45:27

by Hans Westgaard Ry

[permalink] [raw]
Subject: [PATCH v2] IB/ipoib: Add handling for sending of skb with many frags

IPoIB converts skb-fragments to sge adding 1 extra sge when SG is enabled.
Current codepath assumes that the max number of sge a device support
is at least MAX_SKB_FRAGS+1, there is no interaction with upper layers
to limit number of fragments in an skb if a device suports fewer
sges. The assumptions also lead to requesting a fixed number of sge
when IPoIB creates queue-pairs with SG enabled.

A fallback/slowpath is implemented using skb_linearize to
handle cases where the conversion would result in more sges than supported.

Signed-off-by: Hans Westgaard Ry <[email protected]>
Reviewed-by: Håkon Bugge <[email protected]>
Reviewed-by: Wei Lin Guay <[email protected]>
---
drivers/infiniband/ulp/ipoib/ipoib.h | 2 ++
drivers/infiniband/ulp/ipoib/ipoib_cm.c | 23 +++++++++++++++++++++--
drivers/infiniband/ulp/ipoib/ipoib_ib.c | 18 ++++++++++++++++++
drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 5 ++++-
4 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index a6f3eab..85be0de 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -244,6 +244,7 @@ struct ipoib_cm_tx {
unsigned tx_tail;
unsigned long flags;
u32 mtu;
+ unsigned max_send_sge;
};

struct ipoib_cm_rx_buf {
@@ -390,6 +391,7 @@ struct ipoib_dev_priv {
int hca_caps;
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
+ unsigned max_send_sge;
};

struct ipoib_ah {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 917e46e..c8ed535 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
+ unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);

if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
return;
}
-
+ if (skb_shinfo(skb)->nr_frags > usable_sge) {
+ if (skb_linearize(skb) < 0) {
+ ipoib_warn(priv, "skb could not be linearized\n");
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ /* Does skb_linearize return ok without reducing nr_frags? */
+ if (skb_shinfo(skb)->nr_frags > usable_sge) {
+ ipoib_warn(priv, "too many frags after skb linearize\n");
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ }
ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
tx->tx_head, skb->len, tx->qp->qp_num);

@@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
struct ib_qp *tx_qp;

if (dev->features & NETIF_F_SG)
- attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+ attr.cap.max_send_sge =
+ min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);

tx_qp = ib_create_qp(priv->pd, &attr);
if (PTR_ERR(tx_qp) == -EINVAL) {
@@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
tx_qp = ib_create_qp(priv->pd, &attr);
}
+ tx->max_send_sge = attr.cap.max_send_sge;
return tx_qp;
}

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5ea0c14..ee7a555 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -540,6 +540,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
+ unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);

if (skb_is_gso(skb)) {
hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -563,6 +564,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
phead = NULL;
hlen = 0;
}
+ if (skb_shinfo(skb)->nr_frags > usable_sge) {
+ if (skb_linearize(skb) < 0) {
+ ipoib_warn(priv, "skb could not be linearized\n");
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ /* Does skb_linearize return ok without reducing nr_frags? */
+ if (skb_shinfo(skb)->nr_frags > usable_sge) {
+ ipoib_warn(priv, "too many frags after skb linearize\n");
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ }

ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
skb->len, address, qpn);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index d48c5ba..b809c37 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;

if (dev->features & NETIF_F_SG)
- init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+ init_attr.cap.max_send_sge =
+ min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);

priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
@@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge;

+ priv->max_send_sge = init_attr.cap.max_send_sge;
+
return 0;

out_free_send_cq:
--
2.4.3


2016-03-03 15:11:13

by Doug Ledford

[permalink] [raw]
Subject: Re: [PATCH v2] IB/ipoib: Add handling for sending of skb with many frags

On 03/02/2016 07:44 AM, Hans Westgaard Ry wrote:
> IPoIB converts skb-fragments to sge adding 1 extra sge when SG is enabled.
> Current codepath assumes that the max number of sge a device support
> is at least MAX_SKB_FRAGS+1, there is no interaction with upper layers
> to limit number of fragments in an skb if a device suports fewer
> sges. The assumptions also lead to requesting a fixed number of sge
> when IPoIB creates queue-pairs with SG enabled.
>
> A fallback/slowpath is implemented using skb_linearize to
> handle cases where the conversion would result in more sges than supported.
>
> Signed-off-by: Hans Westgaard Ry <[email protected]>
> Reviewed-by: Håkon Bugge <[email protected]>
> Reviewed-by: Wei Lin Guay <[email protected]>

Thanks for the version 2 that handles both connected and disconnected
mode. Applied.

> ---
> drivers/infiniband/ulp/ipoib/ipoib.h | 2 ++
> drivers/infiniband/ulp/ipoib/ipoib_cm.c | 23 +++++++++++++++++++++--
> drivers/infiniband/ulp/ipoib/ipoib_ib.c | 18 ++++++++++++++++++
> drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 5 ++++-
> 4 files changed, 45 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index a6f3eab..85be0de 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -244,6 +244,7 @@ struct ipoib_cm_tx {
> unsigned tx_tail;
> unsigned long flags;
> u32 mtu;
> + unsigned max_send_sge;
> };
>
> struct ipoib_cm_rx_buf {
> @@ -390,6 +391,7 @@ struct ipoib_dev_priv {
> int hca_caps;
> struct ipoib_ethtool_st ethtool;
> struct timer_list poll_timer;
> + unsigned max_send_sge;
> };
>
> struct ipoib_ah {
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> index 917e46e..c8ed535 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> @@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> struct ipoib_tx_buf *tx_req;
> int rc;
> + unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
>
> if (unlikely(skb->len > tx->mtu)) {
> ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
> @@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
> ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
> return;
> }
> -
> + if (skb_shinfo(skb)->nr_frags > usable_sge) {
> + if (skb_linearize(skb) < 0) {
> + ipoib_warn(priv, "skb could not be linearized\n");
> + ++dev->stats.tx_dropped;
> + ++dev->stats.tx_errors;
> + dev_kfree_skb_any(skb);
> + return;
> + }
> + /* Does skb_linearize return ok without reducing nr_frags? */
> + if (skb_shinfo(skb)->nr_frags > usable_sge) {
> + ipoib_warn(priv, "too many frags after skb linearize\n");
> + ++dev->stats.tx_dropped;
> + ++dev->stats.tx_errors;
> + dev_kfree_skb_any(skb);
> + return;
> + }
> + }
> ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
> tx->tx_head, skb->len, tx->qp->qp_num);
>
> @@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
> struct ib_qp *tx_qp;
>
> if (dev->features & NETIF_F_SG)
> - attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
> + attr.cap.max_send_sge =
> + min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
>
> tx_qp = ib_create_qp(priv->pd, &attr);
> if (PTR_ERR(tx_qp) == -EINVAL) {
> @@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
> attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
> tx_qp = ib_create_qp(priv->pd, &attr);
> }
> + tx->max_send_sge = attr.cap.max_send_sge;
> return tx_qp;
> }
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> index 5ea0c14..ee7a555 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> @@ -540,6 +540,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
> struct ipoib_tx_buf *tx_req;
> int hlen, rc;
> void *phead;
> + unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);
>
> if (skb_is_gso(skb)) {
> hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
> @@ -563,6 +564,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
> phead = NULL;
> hlen = 0;
> }
> + if (skb_shinfo(skb)->nr_frags > usable_sge) {
> + if (skb_linearize(skb) < 0) {
> + ipoib_warn(priv, "skb could not be linearized\n");
> + ++dev->stats.tx_dropped;
> + ++dev->stats.tx_errors;
> + dev_kfree_skb_any(skb);
> + return;
> + }
> + /* Does skb_linearize return ok without reducing nr_frags? */
> + if (skb_shinfo(skb)->nr_frags > usable_sge) {
> + ipoib_warn(priv, "too many frags after skb linearize\n");
> + ++dev->stats.tx_dropped;
> + ++dev->stats.tx_errors;
> + dev_kfree_skb_any(skb);
> + return;
> + }
> + }
>
> ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
> skb->len, address, qpn);
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> index d48c5ba..b809c37 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> @@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
> init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
>
> if (dev->features & NETIF_F_SG)
> - init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
> + init_attr.cap.max_send_sge =
> + min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
>
> priv->qp = ib_create_qp(priv->pd, &init_attr);
> if (IS_ERR(priv->qp)) {
> @@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
> priv->rx_wr.next = NULL;
> priv->rx_wr.sg_list = priv->rx_sge;
>
> + priv->max_send_sge = init_attr.cap.max_send_sge;
> +
> return 0;
>
> out_free_send_cq:
>


--
Doug Ledford <[email protected]>
GPG KeyID: 0E572FDD



Attachments:
signature.asc (884.00 B)
OpenPGP digital signature