2017-07-26 14:53:48

by Marcel Apfelbaum

[permalink] [raw]
Subject: [PATCH] drivers/rxe: improve rxe loopback

Currently a packet is marked for loopback only if the source and
destination address match. This is not enough when multiple
gids are present in rxe's gid table and the traffic is
from one gid to another.

Fix it by marking the packet for loopback if the destination
address appears in rxe's gid table.

Signed-off-by: Marcel Apfelbaum <[email protected]>
---
drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index c3a140e..b76a9a3 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
}

+static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
+{
+ struct in_device *in_dev;
+ bool same_rxe = false;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(rxe->ndev);
+ if (!in_dev)
+ goto out;
+
+ for_ifa(in_dev)
+ if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
+ same_rxe = true;
+ goto out;
+ }
+ endfor_ifa(in_dev);
+out:
+ rcu_read_unlock();
+ return same_rxe;
+}
+
static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, struct rxe_av *av)
{
@@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EHOSTUNREACH;
}

- if (!memcmp(saddr, daddr, sizeof(*daddr)))
+ if (addr4_same_rxe(rxe, daddr))
pkt->mask |= RXE_LOOPBACK_MASK;

prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
@@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return 0;
}

+static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
+{
+ struct inet6_dev *in6_dev;
+ struct inet6_ifaddr *ifp;
+ bool same_rxe = false;
+
+ in6_dev = in6_dev_get(rxe->ndev);
+ if (!in6_dev)
+ return false;
+
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
+ if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
+ same_rxe = true;
+ goto out;
+ }
+out:
+ read_unlock_bh(&in6_dev->lock);
+ in6_dev_put(in6_dev);
+ return same_rxe;
+}
+
static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, struct rxe_av *av)
{
@@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EHOSTUNREACH;
}

- if (!memcmp(saddr, daddr, sizeof(*daddr)))
+ if (addr6_same_rxe(rxe, daddr))
pkt->mask |= RXE_LOOPBACK_MASK;

prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
--
2.9.4


2017-07-26 19:36:56

by Yuval Shaia

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
> Currently a packet is marked for loopback only if the source and
> destination address match. This is not enough when multiple
> gids are present in rxe's gid table and the traffic is
> from one gid to another.
>
> Fix it by marking the packet for loopback if the destination
> address appears in rxe's gid table.
>
> Signed-off-by: Marcel Apfelbaum <[email protected]>
> ---
> drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
> 1 file changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index c3a140e..b76a9a3 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
> ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
> }
>
> +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
> +{
> + struct in_device *in_dev;
> + bool same_rxe = false;
> +
> + rcu_read_lock();
> + in_dev = __in_dev_get_rcu(rxe->ndev);
> + if (!in_dev)
> + goto out;
> +
> + for_ifa(in_dev)
> + if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
> + same_rxe = true;
> + goto out;
> + }
> + endfor_ifa(in_dev);

The above endfor_ifa should move to below.

> +out:
> + rcu_read_unlock();
> + return same_rxe;
> +}
> +
> static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> struct sk_buff *skb, struct rxe_av *av)
> {
> @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> return -EHOSTUNREACH;
> }
>
> - if (!memcmp(saddr, daddr, sizeof(*daddr)))
> + if (addr4_same_rxe(rxe, daddr))
> pkt->mask |= RXE_LOOPBACK_MASK;
>
> prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> return 0;
> }
>
> +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
> +{
> + struct inet6_dev *in6_dev;
> + struct inet6_ifaddr *ifp;
> + bool same_rxe = false;
> +
> + in6_dev = in6_dev_get(rxe->ndev);
> + if (!in6_dev)
> + return false;
> +
> + read_lock_bh(&in6_dev->lock);
> + list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
> + if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
> + same_rxe = true;
> + goto out;
> + }
> +out:
> + read_unlock_bh(&in6_dev->lock);
> + in6_dev_put(in6_dev);
> + return same_rxe;
> +}
> +
> static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> struct sk_buff *skb, struct rxe_av *av)
> {
> @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> return -EHOSTUNREACH;
> }
>
> - if (!memcmp(saddr, daddr, sizeof(*daddr)))
> + if (addr6_same_rxe(rxe, daddr))
> pkt->mask |= RXE_LOOPBACK_MASK;
>
> prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> --
> 2.9.4
>

2017-07-26 19:57:00

by Yuval Shaia

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

> > + endfor_ifa(in_dev);
>
> The above endfor_ifa should move to below.

Please ignore, my mistake.

>
> > +out:

2017-07-26 19:57:48

by Yuval Shaia

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
> Currently a packet is marked for loopback only if the source and
> destination address match. This is not enough when multiple
> gids are present in rxe's gid table and the traffic is
> from one gid to another.
>
> Fix it by marking the packet for loopback if the destination
> address appears in rxe's gid table.
>
> Signed-off-by: Marcel Apfelbaum <[email protected]>

Reviewed-by: Yuval Shaia <[email protected]>

Tested-by: Yuval Shaia <[email protected]>

> ---
> drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
> 1 file changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index c3a140e..b76a9a3 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
> ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
> }
>
> +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
> +{
> + struct in_device *in_dev;
> + bool same_rxe = false;
> +
> + rcu_read_lock();
> + in_dev = __in_dev_get_rcu(rxe->ndev);
> + if (!in_dev)
> + goto out;
> +
> + for_ifa(in_dev)
> + if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
> + same_rxe = true;
> + goto out;
> + }
> + endfor_ifa(in_dev);
> +out:
> + rcu_read_unlock();
> + return same_rxe;
> +}
> +
> static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> struct sk_buff *skb, struct rxe_av *av)
> {
> @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> return -EHOSTUNREACH;
> }
>
> - if (!memcmp(saddr, daddr, sizeof(*daddr)))
> + if (addr4_same_rxe(rxe, daddr))
> pkt->mask |= RXE_LOOPBACK_MASK;
>
> prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> return 0;
> }
>
> +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
> +{
> + struct inet6_dev *in6_dev;
> + struct inet6_ifaddr *ifp;
> + bool same_rxe = false;
> +
> + in6_dev = in6_dev_get(rxe->ndev);
> + if (!in6_dev)
> + return false;
> +
> + read_lock_bh(&in6_dev->lock);
> + list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
> + if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
> + same_rxe = true;
> + goto out;
> + }
> +out:
> + read_unlock_bh(&in6_dev->lock);
> + in6_dev_put(in6_dev);
> + return same_rxe;
> +}
> +
> static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> struct sk_buff *skb, struct rxe_av *av)
> {
> @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> return -EHOSTUNREACH;
> }
>
> - if (!memcmp(saddr, daddr, sizeof(*daddr)))
> + if (addr6_same_rxe(rxe, daddr))
> pkt->mask |= RXE_LOOPBACK_MASK;
>
> prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> --
> 2.9.4
>

2017-07-27 07:04:40

by Moni Shoua

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

On Wed, Jul 26, 2017 at 10:57 PM, Yuval Shaia <[email protected]> wrote:
> On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
>> Currently a packet is marked for loopback only if the source and
>> destination address match. This is not enough when multiple
>> gids are present in rxe's gid table and the traffic is
>> from one gid to another.
>>
>> Fix it by marking the packet for loopback if the destination
>> address appears in rxe's gid table.
>>
>> Signed-off-by: Marcel Apfelbaum <[email protected]>
>
Have you considered using ip_route_output_key() for IPv4 or
ip6_route_output() for IPv6 to decide if this is a loopback?
For reference you can check the flow starting at rdma_resolve_ip()

2017-07-27 07:36:42

by Leon Romanovsky

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
> Currently a packet is marked for loopback only if the source and
> destination address match. This is not enough when multiple
> gids are present in rxe's gid table and the traffic is
> from one gid to another.
>
> Fix it by marking the packet for loopback if the destination
> address appears in rxe's gid table.
>
> Signed-off-by: Marcel Apfelbaum <[email protected]>
> ---
> drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
> 1 file changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index c3a140e..b76a9a3 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
> ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
> }
>
> +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
> +{

In addition to Moni's comment, no "inline" functions in *.c files, please.

> + struct in_device *in_dev;
> + bool same_rxe = false;
> +
> + rcu_read_lock();
> + in_dev = __in_dev_get_rcu(rxe->ndev);
> + if (!in_dev)
> + goto out;
> +
> + for_ifa(in_dev)
> + if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
> + same_rxe = true;
> + goto out;
> + }
> + endfor_ifa(in_dev);

I'm afraid that it will decrease performance drastically. One of the
possible solutions to overcome it, is to check the address of first packet
only, but it will work for RC only.

> +out:
> + rcu_read_unlock();
> + return same_rxe;
> +}
> +
> static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> struct sk_buff *skb, struct rxe_av *av)
> {
> @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> return -EHOSTUNREACH;
> }
>
> - if (!memcmp(saddr, daddr, sizeof(*daddr)))
> + if (addr4_same_rxe(rxe, daddr))
> pkt->mask |= RXE_LOOPBACK_MASK;
>
> prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> return 0;
> }
>
> +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
> +{

Ditto

> + struct inet6_dev *in6_dev;
> + struct inet6_ifaddr *ifp;
> + bool same_rxe = false;
> +
> + in6_dev = in6_dev_get(rxe->ndev);
> + if (!in6_dev)
> + return false;
> +
> + read_lock_bh(&in6_dev->lock);
> + list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
> + if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
> + same_rxe = true;
> + goto out;
> + }
> +out:
> + read_unlock_bh(&in6_dev->lock);
> + in6_dev_put(in6_dev);
> + return same_rxe;
> +}
> +
> static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> struct sk_buff *skb, struct rxe_av *av)
> {
> @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> return -EHOSTUNREACH;
> }
>
> - if (!memcmp(saddr, daddr, sizeof(*daddr)))
> + if (addr6_same_rxe(rxe, daddr))
> pkt->mask |= RXE_LOOPBACK_MASK;
>
> prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> --
> 2.9.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html


Attachments:
(No filename) (3.32 kB)
signature.asc (833.00 B)
Download all attachments

2017-07-27 09:49:33

by Marcel Apfelbaum

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

On 27/07/2017 10:36, Leon Romanovsky wrote:
> On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
>> Currently a packet is marked for loopback only if the source and
>> destination address match. This is not enough when multiple
>> gids are present in rxe's gid table and the traffic is
>> from one gid to another.
>>
>> Fix it by marking the packet for loopback if the destination
>> address appears in rxe's gid table.
>>
>> Signed-off-by: Marcel Apfelbaum <[email protected]>
>> ---
>> drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
>> 1 file changed, 45 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
>> index c3a140e..b76a9a3 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>> @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
>> ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
>> }
>>
>> +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
>> +{

Hi Leon,
Thanks for the review.

>
> In addition to Moni's comment, no "inline" functions in *.c files, please.
>

Sure, I simply followed the function on the same file:
static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
I even borrowed the name...

>> + struct in_device *in_dev;
>> + bool same_rxe = false;
>> +
>> + rcu_read_lock();
>> + in_dev = __in_dev_get_rcu(rxe->ndev);
>> + if (!in_dev)
>> + goto out;
>> +
>> + for_ifa(in_dev)
>> + if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
>> + same_rxe = true;
>> + goto out;
>> + }
>> + endfor_ifa(in_dev);
>
> I'm afraid that it will decrease performance drastically. One of the
> possible solutions to overcome it, is to check the address of first packet
> only, but it will work for RC only.
>

How do you know is "the first" packet?
And yes, for UD the performance would decrease, but only
if the netdev has multiple IPs, right?

I'll ask on Moni's response mail for alternatives.

Thanks,
Marcel

>> +out:
>> + rcu_read_unlock();
>> + return same_rxe;
>> +}
>> +
>> static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>> struct sk_buff *skb, struct rxe_av *av)
>> {
>> @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>> return -EHOSTUNREACH;
>> }
>>
>> - if (!memcmp(saddr, daddr, sizeof(*daddr)))
>> + if (addr4_same_rxe(rxe, daddr))
>> pkt->mask |= RXE_LOOPBACK_MASK;
>>
>> prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
>> @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>> return 0;
>> }
>>
>> +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
>> +{
>
> Ditto
>
>> + struct inet6_dev *in6_dev;
>> + struct inet6_ifaddr *ifp;
>> + bool same_rxe = false;
>> +
>> + in6_dev = in6_dev_get(rxe->ndev);
>> + if (!in6_dev)
>> + return false;
>> +
>> + read_lock_bh(&in6_dev->lock);
>> + list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
>> + if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
>> + same_rxe = true;
>> + goto out;
>> + }
>> +out:
>> + read_unlock_bh(&in6_dev->lock);
>> + in6_dev_put(in6_dev);
>> + return same_rxe;
>> +}
>> +
>> static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>> struct sk_buff *skb, struct rxe_av *av)
>> {
>> @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>> return -EHOSTUNREACH;
>> }
>>
>> - if (!memcmp(saddr, daddr, sizeof(*daddr)))
>> + if (addr6_same_rxe(rxe, daddr))
>> pkt->mask |= RXE_LOOPBACK_MASK;
>>
>> prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
>> --
>> 2.9.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html

2017-07-27 09:56:02

by Marcel Apfelbaum

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

On 27/07/2017 10:04, Moni Shoua wrote:
> On Wed, Jul 26, 2017 at 10:57 PM, Yuval Shaia <[email protected]> wrote:
>> On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
>>> Currently a packet is marked for loopback only if the source and
>>> destination address match. This is not enough when multiple
>>> gids are present in rxe's gid table and the traffic is
>>> from one gid to another.
>>>
>>> Fix it by marking the packet for loopback if the destination
>>> address appears in rxe's gid table.
>>>
>>> Signed-off-by: Marcel Apfelbaum <[email protected]>
>>
> Have you considered using ip_route_output_key() for IPv4 or
> ip6_route_output() for IPv6 to decide if this is a loopback?
> For reference you can check the flow starting at rdma_resolve_ip()
>

Hi Moni,

Yes, I had looked into it, but I haven't seen how I can find
out if the destination IP belongs to the same RXE.
The loopback flag will give us the "same host"
confirmation, but not the same rxe instance, right?

Any ideas would be welcomed.

Thanks,
Marcel

2017-07-27 10:40:24

by Leon Romanovsky

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

On Thu, Jul 27, 2017 at 12:49:17PM +0300, Marcel Apfelbaum wrote:
> On 27/07/2017 10:36, Leon Romanovsky wrote:
> > On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
> > > Currently a packet is marked for loopback only if the source and
> > > destination address match. This is not enough when multiple
> > > gids are present in rxe's gid table and the traffic is
> > > from one gid to another.
> > >
> > > Fix it by marking the packet for loopback if the destination
> > > address appears in rxe's gid table.
> > >
> > > Signed-off-by: Marcel Apfelbaum <[email protected]>
> > > ---
> > > drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
> > > 1 file changed, 45 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> > > index c3a140e..b76a9a3 100644
> > > --- a/drivers/infiniband/sw/rxe/rxe_net.c
> > > +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> > > @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
> > > ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
> > > }
> > >
> > > +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
> > > +{
>
> Hi Leon,
> Thanks for the review.
>
> >
> > In addition to Moni's comment, no "inline" functions in *.c files, please.
> >
>
> Sure, I simply followed the function on the same file:
> static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
> I even borrowed the name...
>
> > > + struct in_device *in_dev;
> > > + bool same_rxe = false;
> > > +
> > > + rcu_read_lock();
> > > + in_dev = __in_dev_get_rcu(rxe->ndev);
> > > + if (!in_dev)
> > > + goto out;
> > > +
> > > + for_ifa(in_dev)
> > > + if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
> > > + same_rxe = true;
> > > + goto out;
> > > + }
> > > + endfor_ifa(in_dev);
> >
> > I'm afraid that it will decrease performance drastically. One of the
> > possible solutions to overcome it, is to check the address of first packet
> > only, but it will work for RC only.
> >
>
> How do you know is "the first" packet?
> And yes, for UD the performance would decrease, but only
> if the netdev has multiple IPs, right?

Yes, and first lookup for QP RC will be "first packet". QP RC are created with "static" address.

>
> I'll ask on Moni's response mail for alternatives.
>
> Thanks,
> Marcel
>
> > > +out:
> > > + rcu_read_unlock();
> > > + return same_rxe;
> > > +}
> > > +
> > > static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > > struct sk_buff *skb, struct rxe_av *av)
> > > {
> > > @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > > return -EHOSTUNREACH;
> > > }
> > >
> > > - if (!memcmp(saddr, daddr, sizeof(*daddr)))
> > > + if (addr4_same_rxe(rxe, daddr))
> > > pkt->mask |= RXE_LOOPBACK_MASK;
> > >
> > > prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> > > @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > > return 0;
> > > }
> > >
> > > +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
> > > +{
> >
> > Ditto
> >
> > > + struct inet6_dev *in6_dev;
> > > + struct inet6_ifaddr *ifp;
> > > + bool same_rxe = false;
> > > +
> > > + in6_dev = in6_dev_get(rxe->ndev);
> > > + if (!in6_dev)
> > > + return false;
> > > +
> > > + read_lock_bh(&in6_dev->lock);
> > > + list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
> > > + if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
> > > + same_rxe = true;
> > > + goto out;
> > > + }
> > > +out:
> > > + read_unlock_bh(&in6_dev->lock);
> > > + in6_dev_put(in6_dev);
> > > + return same_rxe;
> > > +}
> > > +
> > > static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > > struct sk_buff *skb, struct rxe_av *av)
> > > {
> > > @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > > return -EHOSTUNREACH;
> > > }
> > >
> > > - if (!memcmp(saddr, daddr, sizeof(*daddr)))
> > > + if (addr6_same_rxe(rxe, daddr))
> > > pkt->mask |= RXE_LOOPBACK_MASK;
> > >
> > > prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> > > --
> > > 2.9.4
> > >
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> > > the body of a message to [email protected]
> > > More majordomo info at http://vger.kernel.org/majordomo-info.html
>


Attachments:
(No filename) (4.39 kB)
signature.asc (833.00 B)
Download all attachments

2017-07-27 13:48:11

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

Hi Marcel,

[auto build test ERROR on rdma/master]
[also build test ERROR on v4.13-rc2 next-20170727]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Marcel-Apfelbaum/drivers-rxe-improve-rxe-loopback/20170727-211514
base: https://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma.git master
config: i386-randconfig-x002-07271954 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386

All error/warnings (new ones prefixed by >>):

drivers/infiniband/sw/rxe/rxe_net.c: In function 'addr6_same_rxe':
>> drivers/infiniband/sw/rxe/rxe_net.c:414:12: error: implicit declaration of function 'in6_dev_get' [-Werror=implicit-function-declaration]
in6_dev = in6_dev_get(rxe->ndev);
^~~~~~~~~~~
>> drivers/infiniband/sw/rxe/rxe_net.c:414:10: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
in6_dev = in6_dev_get(rxe->ndev);
^
>> drivers/infiniband/sw/rxe/rxe_net.c:426:2: error: implicit declaration of function 'in6_dev_put' [-Werror=implicit-function-declaration]
in6_dev_put(in6_dev);
^~~~~~~~~~~
cc1: some warnings being treated as errors

vim +/in6_dev_get +414 drivers/infiniband/sw/rxe/rxe_net.c

407
408 static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
409 {
410 struct inet6_dev *in6_dev;
411 struct inet6_ifaddr *ifp;
412 bool same_rxe = false;
413
> 414 in6_dev = in6_dev_get(rxe->ndev);
415 if (!in6_dev)
416 return false;
417
418 read_lock_bh(&in6_dev->lock);
419 list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
420 if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
421 same_rxe = true;
422 goto out;
423 }
424 out:
425 read_unlock_bh(&in6_dev->lock);
> 426 in6_dev_put(in6_dev);
427 return same_rxe;
428 }
429

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (2.15 kB)
.config.gz (28.10 kB)
Download all attachments

2017-07-30 09:57:06

by Moni Shoua

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

>> Have you considered using ip_route_output_key() for IPv4 or
>> ip6_route_output() for IPv6 to decide if this is a loopback?
>> For reference you can check the flow starting at rdma_resolve_ip()
>>
>
> Hi Moni,
>
> Yes, I had looked into it, but I haven't seen how I can find
> out if the destination IP belongs to the same RXE.
> The loopback flag will give us the "same host"
> confirmation, but not the same rxe instance, right?
>
> Any ideas would be welcomed.
>
> Thanks,
> Marcel
>
Hi Marcel

You are right about that. IFF_LOOPBACK tells you that the source and
destination addresses are on the same host but not necessarily on the
same RXE device.

As Leon mentioned, calling addrX_same_rxe() for each packet seems to
heavy , especially when the use case that justifies it (instead of
calling memcmp() on src and dst) is rare. Do you agree?
If so I think that marking a connection as loopback once is the right approach
For RC/UC - when modified to RTR
For UD - this is harder. IsLoopback() is function of the WQE (or at
least the QP and AH together( but not the QP. I think you can add an
improvement that will work for the majority of cases. This is a sketch
of what I have in mind. Let me know what you think please

1. Add bool last_used_qp to AH structure
2. Add bool is_loopback_with_last_qp to AH structure
3. Set values to AH.last_used_qp and AH.is_loopback_with_last_qp in
post_send() modify_ah(),...
4. Mark WQE as loopback depending on the above

2017-07-31 09:53:52

by Marcel Apfelbaum

[permalink] [raw]
Subject: Re: [PATCH] drivers/rxe: improve rxe loopback

On 30/07/2017 12:57, Moni Shoua wrote:
>>> Have you considered using ip_route_output_key() for IPv4 or
>>> ip6_route_output() for IPv6 to decide if this is a loopback?
>>> For reference you can check the flow starting at rdma_resolve_ip()
>>>
>>
>> Hi Moni,
>>
>> Yes, I had looked into it, but I haven't seen how I can find
>> out if the destination IP belongs to the same RXE.
>> The loopback flag will give us the "same host"
>> confirmation, but not the same rxe instance, right?
>>
>> Any ideas would be welcomed.
>>
>> Thanks,
>> Marcel
>>
> Hi Marcel
>

Hi Moni,

> You are right about that. IFF_LOOPBACK tells you that the source and
> destination addresses are on the same host but not necessarily on the
> same RXE device.
>
> As Leon mentioned, calling addrX_same_rxe() for each packet seems to
> heavy , especially when the use case that justifies it (instead of
> calling memcmp() on src and dst) is rare. Do you agree?

I do agree is rare, but is depending on use-case. And since it
is a bug we should fix it, but not on the expense of performance
of course.

> If so I think that marking a connection as loopback once is the right approach
> For RC/UC - when modified to RTR

Sounds good to me.

> For UD - this is harder. IsLoopback() is function of the WQE (or at
> least the QP and AH together( but not the QP. I think you can add an
> improvement that will work for the majority of cases. This is a sketch
> of what I have in mind. Let me know what you think please
>
> 1. Add bool last_used_qp to AH structure
> 2. Add bool is_loopback_with_last_qp to AH structure
> 3. Set values to AH.last_used_qp and AH.is_loopback_with_last_qp in
> post_send() modify_ah(),...
> 4. Mark WQE as loopback depending on the above
>

Your pointer is very much appreciated, I will look into it.

Thanks,
Marcel