If the netdevice is within a container and communicates externally
through network technologies like VXLAN, we won't be able to find
routing information in the init_net namespace. To address this issue,
we need to add a struct net parameter to the smc_ib_find_route function.
This allow us to locate the routing information within the corresponding
net namespace, ensuring the correct completion of the SMC CLC interaction.
Signed-off-by: Albert Huang <[email protected]>
---
net/smc/af_smc.c | 3 ++-
net/smc/smc_ib.c | 7 ++++---
net/smc/smc_ib.h | 2 +-
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index bacdd971615e..7a874da90c7f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
smc_get_clc_first_contact_ext(clc_v2, false);
+ struct net *net = sock_net(&smc->sk);
int rc;
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
@@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
ini->smcrv2.uses_gateway = false;
} else {
- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
+ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
ini->smcrv2.nexthop_mac,
&ini->smcrv2.uses_gateway))
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 9b66d6aeeb1a..89981dbe46c9 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
-int smc_ib_find_route(__be32 saddr, __be32 daddr,
+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway)
{
struct neighbour *neigh = NULL;
@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
if (daddr == cpu_to_be32(INADDR_NONE))
goto out;
- rt = ip_route_output_flow(&init_net, &fl4, NULL);
+ rt = ip_route_output_flow(net, &fl4, NULL);
if (IS_ERR(rt))
goto out;
if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
struct in_device *in_dev = __in_dev_get_rcu(ndev);
+ struct net *net = dev_net(ndev);
const struct in_ifaddr *ifa;
bool subnet_match = false;
@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
}
if (!subnet_match)
goto out;
- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
+ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
smcrv2->daddr,
smcrv2->nexthop_mac,
&smcrv2->uses_gateway))
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 4df5f8c8a0a1..ef8ac2b7546d 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
unsigned short vlan_id, u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2);
-int smc_ib_find_route(__be32 saddr, __be32 daddr,
+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway);
bool smc_ib_is_valid_local_systemid(void);
int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
--
2.37.1 (Apple Git-137.1)
On Mon, Sep 25, 2023 at 10:35:45AM +0800, Albert Huang wrote:
> If the netdevice is within a container and communicates externally
> through network technologies like VXLAN, we won't be able to find
> routing information in the init_net namespace. To address this issue,
> we need to add a struct net parameter to the smc_ib_find_route function.
> This allow us to locate the routing information within the corresponding
> net namespace, ensuring the correct completion of the SMC CLC interaction.
>
> Signed-off-by: Albert Huang <[email protected]>
> ---
> net/smc/af_smc.c | 3 ++-
> net/smc/smc_ib.c | 7 ++++---
> net/smc/smc_ib.h | 2 +-
> 3 files changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index bacdd971615e..7a874da90c7f 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
> (struct smc_clc_msg_accept_confirm_v2 *)aclc;
> struct smc_clc_first_contact_ext *fce =
> smc_get_clc_first_contact_ext(clc_v2, false);
> + struct net *net = sock_net(&smc->sk);
> int rc;
>
> if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
> @@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
> memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
> ini->smcrv2.uses_gateway = false;
> } else {
> - if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
> + if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
> smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
> ini->smcrv2.nexthop_mac,
> &ini->smcrv2.uses_gateway))
> diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
> index 9b66d6aeeb1a..89981dbe46c9 100644
> --- a/net/smc/smc_ib.c
> +++ b/net/smc/smc_ib.c
> @@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
> return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
> }
>
> -int smc_ib_find_route(__be32 saddr, __be32 daddr,
> +int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
> u8 nexthop_mac[], u8 *uses_gateway)
> {
> struct neighbour *neigh = NULL;
> @@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
>
> if (daddr == cpu_to_be32(INADDR_NONE))
> goto out;
> - rt = ip_route_output_flow(&init_net, &fl4, NULL);
> + rt = ip_route_output_flow(net, &fl4, NULL);
This patch made me wonder, why doesn't SMC use RDMA-CM like all other
in-kernel ULPs which work over RDMA?
Thanks
On Tue, Sep 26, 2023 at 01:14:04PM +0200, Alexandra Winter wrote:
>
>
> On 26.09.23 12:48, Leon Romanovsky wrote:
> > This patch made me wonder, why doesn't SMC use RDMA-CM like all other
> > in-kernel ULPs which work over RDMA?
> >
> > Thanks
>
> The idea behind SMC is that it should look an feel to the applications
> like TCP sockets. So for connection management it uses TCP over IP;
> RDMA is just used for the data transfer.
I think that it is not different from other ULPs. For example, RDS works
over sockets and doesn't touch or reimplement GID management logic.
Thanks
On Tue, Sep 26, 2023 at 02:41:04PM +0300, Leon Romanovsky wrote:
>On Tue, Sep 26, 2023 at 01:14:04PM +0200, Alexandra Winter wrote:
>>
>>
>> On 26.09.23 12:48, Leon Romanovsky wrote:
>> > This patch made me wonder, why doesn't SMC use RDMA-CM like all other
>> > in-kernel ULPs which work over RDMA?
>> >
>> > Thanks
>>
>> The idea behind SMC is that it should look an feel to the applications
>> like TCP sockets. So for connection management it uses TCP over IP;
>> RDMA is just used for the data transfer.
>
>I think that it is not different from other ULPs. For example, RDS works
>over sockets and doesn't touch or reimplement GID management logic.
I think the difference is SMC socket need to be compatible with TCP
socket, so it need a tcp socket to fallback when something is not working.
If SMC works with rdmacm, it still need a fallback-to-tcp socket, and
the tcp connection has to be established for each SMC socket before the
SMC socket got established, that would make rdmacm meaningless.
Best regards,
Dust
>
>Thanks
On 26.09.23 12:48, Leon Romanovsky wrote:
> This patch made me wonder, why doesn't SMC use RDMA-CM like all other
> in-kernel ULPs which work over RDMA?
>
> Thanks
The idea behind SMC is that it should look an feel to the applications
like TCP sockets. So for connection management it uses TCP over IP;
RDMA is just used for the data transfer.
On Tue, Sep 26, 2023 at 08:09:03PM +0800, Dust Li wrote:
> On Tue, Sep 26, 2023 at 02:41:04PM +0300, Leon Romanovsky wrote:
> >On Tue, Sep 26, 2023 at 01:14:04PM +0200, Alexandra Winter wrote:
> >>
> >>
> >> On 26.09.23 12:48, Leon Romanovsky wrote:
> >> > This patch made me wonder, why doesn't SMC use RDMA-CM like all other
> >> > in-kernel ULPs which work over RDMA?
> >> >
> >> > Thanks
> >>
> >> The idea behind SMC is that it should look an feel to the applications
> >> like TCP sockets. So for connection management it uses TCP over IP;
> >> RDMA is just used for the data transfer.
> >
> >I think that it is not different from other ULPs. For example, RDS works
> >over sockets and doesn't touch or reimplement GID management logic.
>
> I think the difference is SMC socket need to be compatible with TCP
> socket, so it need a tcp socket to fallback when something is not working.
>
> If SMC works with rdmacm, it still need a fallback-to-tcp socket, and
> the tcp connection has to be established for each SMC socket before the
> SMC socket got established, that would make rdmacm meaningless.
You still need to perform device-GID-route translations [1], which sounds
to me very RDMA-CM. I'm not asking you to rewrite the code, but trying
to get rationale behind reimplementing part of RDMA subsystem.
Thanks
[1] 24fb68111d45 ("net/smc: retrieve v2 gid from IB device")
>
> Best regards,
> Dust
>
> >
> >Thanks
On Mon, Sep 25, 2023 at 10:35:45AM +0800, Albert Huang wrote:
>If the netdevice is within a container and communicates externally
>through network technologies like VXLAN, we won't be able to find
>routing information in the init_net namespace. To address this issue,
Thanks for your founding !
I think this is a more generic problem, but not just related to VXLAN ?
If we use SMC-R v2 and the netdevice is in a net namespace which is not
init_net, we should always fail, right ? If so, I'd prefer this to be a bugfix.
Best regards,
Dust
>we need to add a struct net parameter to the smc_ib_find_route function.
>This allow us to locate the routing information within the corresponding
>net namespace, ensuring the correct completion of the SMC CLC interaction.
>
>Signed-off-by: Albert Huang <[email protected]>
>---
> net/smc/af_smc.c | 3 ++-
> net/smc/smc_ib.c | 7 ++++---
> net/smc/smc_ib.h | 2 +-
> 3 files changed, 7 insertions(+), 5 deletions(-)
>
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index bacdd971615e..7a874da90c7f 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
> (struct smc_clc_msg_accept_confirm_v2 *)aclc;
> struct smc_clc_first_contact_ext *fce =
> smc_get_clc_first_contact_ext(clc_v2, false);
>+ struct net *net = sock_net(&smc->sk);
> int rc;
>
> if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
>@@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
> memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
> ini->smcrv2.uses_gateway = false;
> } else {
>- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
>+ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
> smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
> ini->smcrv2.nexthop_mac,
> &ini->smcrv2.uses_gateway))
>diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
>index 9b66d6aeeb1a..89981dbe46c9 100644
>--- a/net/smc/smc_ib.c
>+++ b/net/smc/smc_ib.c
>@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
> return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
> }
>
>-int smc_ib_find_route(__be32 saddr, __be32 daddr,
>+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
> u8 nexthop_mac[], u8 *uses_gateway)
> {
> struct neighbour *neigh = NULL;
>@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
>
> if (daddr == cpu_to_be32(INADDR_NONE))
> goto out;
>- rt = ip_route_output_flow(&init_net, &fl4, NULL);
>+ rt = ip_route_output_flow(net, &fl4, NULL);
> if (IS_ERR(rt))
> goto out;
> if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
>@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
> smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
> struct in_device *in_dev = __in_dev_get_rcu(ndev);
>+ struct net *net = dev_net(ndev);
> const struct in_ifaddr *ifa;
> bool subnet_match = false;
>
>@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> }
> if (!subnet_match)
> goto out;
>- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
>+ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
> smcrv2->daddr,
> smcrv2->nexthop_mac,
> &smcrv2->uses_gateway))
>diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
>index 4df5f8c8a0a1..ef8ac2b7546d 100644
>--- a/net/smc/smc_ib.h
>+++ b/net/smc/smc_ib.h
>@@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
> int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
> unsigned short vlan_id, u8 gid[], u8 *sgid_index,
> struct smc_init_info_smcrv2 *smcrv2);
>-int smc_ib_find_route(__be32 saddr, __be32 daddr,
>+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
> u8 nexthop_mac[], u8 *uses_gateway);
> bool smc_ib_is_valid_local_systemid(void);
> int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
>--
>2.37.1 (Apple Git-137.1)
On Wed, Sep 27, 2023 at 11:42:09AM +0800, Dust Li wrote:
> On Mon, Sep 25, 2023 at 10:35:45AM +0800, Albert Huang wrote:
> >If the netdevice is within a container and communicates externally
> >through network technologies like VXLAN, we won't be able to find
> >routing information in the init_net namespace. To address this issue,
>
> Thanks for your founding !
>
> I think this is a more generic problem, but not just related to VXLAN ?
> If we use SMC-R v2 and the netdevice is in a net namespace which is not
> init_net, we should always fail, right ? If so, I'd prefer this to be a bugfix.
BTW, does this patch take into account net namespace of ib_device?
Thanks
>
> Best regards,
> Dust
>
> >we need to add a struct net parameter to the smc_ib_find_route function.
> >This allow us to locate the routing information within the corresponding
> >net namespace, ensuring the correct completion of the SMC CLC interaction.
> >
> >Signed-off-by: Albert Huang <[email protected]>
> >---
> > net/smc/af_smc.c | 3 ++-
> > net/smc/smc_ib.c | 7 ++++---
> > net/smc/smc_ib.h | 2 +-
> > 3 files changed, 7 insertions(+), 5 deletions(-)
> >
> >diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> >index bacdd971615e..7a874da90c7f 100644
> >--- a/net/smc/af_smc.c
> >+++ b/net/smc/af_smc.c
> >@@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
> > (struct smc_clc_msg_accept_confirm_v2 *)aclc;
> > struct smc_clc_first_contact_ext *fce =
> > smc_get_clc_first_contact_ext(clc_v2, false);
> >+ struct net *net = sock_net(&smc->sk);
> > int rc;
> >
> > if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
> >@@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
> > memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
> > ini->smcrv2.uses_gateway = false;
> > } else {
> >- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
> >+ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
> > smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
> > ini->smcrv2.nexthop_mac,
> > &ini->smcrv2.uses_gateway))
> >diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
> >index 9b66d6aeeb1a..89981dbe46c9 100644
> >--- a/net/smc/smc_ib.c
> >+++ b/net/smc/smc_ib.c
> >@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
> > return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
> > }
> >
> >-int smc_ib_find_route(__be32 saddr, __be32 daddr,
> >+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
> > u8 nexthop_mac[], u8 *uses_gateway)
> > {
> > struct neighbour *neigh = NULL;
> >@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
> >
> > if (daddr == cpu_to_be32(INADDR_NONE))
> > goto out;
> >- rt = ip_route_output_flow(&init_net, &fl4, NULL);
> >+ rt = ip_route_output_flow(net, &fl4, NULL);
> > if (IS_ERR(rt))
> > goto out;
> > if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
> >@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> > if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
> > smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
> > struct in_device *in_dev = __in_dev_get_rcu(ndev);
> >+ struct net *net = dev_net(ndev);
> > const struct in_ifaddr *ifa;
> > bool subnet_match = false;
> >
> >@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> > }
> > if (!subnet_match)
> > goto out;
> >- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
> >+ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
> > smcrv2->daddr,
> > smcrv2->nexthop_mac,
> > &smcrv2->uses_gateway))
> >diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
> >index 4df5f8c8a0a1..ef8ac2b7546d 100644
> >--- a/net/smc/smc_ib.h
> >+++ b/net/smc/smc_ib.h
> >@@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
> > int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
> > unsigned short vlan_id, u8 gid[], u8 *sgid_index,
> > struct smc_init_info_smcrv2 *smcrv2);
> >-int smc_ib_find_route(__be32 saddr, __be32 daddr,
> >+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
> > u8 nexthop_mac[], u8 *uses_gateway);
> > bool smc_ib_is_valid_local_systemid(void);
> > int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
> >--
> >2.37.1 (Apple Git-137.1)
>
On Wed, Sep 27, 2023 at 08:55:28AM +0300, Leon Romanovsky wrote:
>On Wed, Sep 27, 2023 at 11:42:09AM +0800, Dust Li wrote:
>> On Mon, Sep 25, 2023 at 10:35:45AM +0800, Albert Huang wrote:
>> >If the netdevice is within a container and communicates externally
>> >through network technologies like VXLAN, we won't be able to find
>> >routing information in the init_net namespace. To address this issue,
>>
>> Thanks for your founding !
>>
>> I think this is a more generic problem, but not just related to VXLAN ?
>> If we use SMC-R v2 and the netdevice is in a net namespace which is not
>> init_net, we should always fail, right ? If so, I'd prefer this to be a bugfix.
>
>BTW, does this patch take into account net namespace of ib_device?
I think this patch is irrelevant with the netns of ib_device.
SMC has a global smc_ib_devices list reported by ib_client, and checked
the netns using rdma_dev_access_netns. So I think we should have handled
that well.
Best regards,
Dust
>
>Thanks
>
>>
>> Best regards,
>> Dust
>>
>> >we need to add a struct net parameter to the smc_ib_find_route function.
>> >This allow us to locate the routing information within the corresponding
>> >net namespace, ensuring the correct completion of the SMC CLC interaction.
>> >
>> >Signed-off-by: Albert Huang <[email protected]>
>> >---
>> > net/smc/af_smc.c | 3 ++-
>> > net/smc/smc_ib.c | 7 ++++---
>> > net/smc/smc_ib.h | 2 +-
>> > 3 files changed, 7 insertions(+), 5 deletions(-)
>> >
>> >diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> >index bacdd971615e..7a874da90c7f 100644
>> >--- a/net/smc/af_smc.c
>> >+++ b/net/smc/af_smc.c
>> >@@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
>> > (struct smc_clc_msg_accept_confirm_v2 *)aclc;
>> > struct smc_clc_first_contact_ext *fce =
>> > smc_get_clc_first_contact_ext(clc_v2, false);
>> >+ struct net *net = sock_net(&smc->sk);
>> > int rc;
>> >
>> > if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
>> >@@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
>> > memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
>> > ini->smcrv2.uses_gateway = false;
>> > } else {
>> >- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
>> >+ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
>> > smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
>> > ini->smcrv2.nexthop_mac,
>> > &ini->smcrv2.uses_gateway))
>> >diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
>> >index 9b66d6aeeb1a..89981dbe46c9 100644
>> >--- a/net/smc/smc_ib.c
>> >+++ b/net/smc/smc_ib.c
>> >@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
>> > return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
>> > }
>> >
>> >-int smc_ib_find_route(__be32 saddr, __be32 daddr,
>> >+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
>> > u8 nexthop_mac[], u8 *uses_gateway)
>> > {
>> > struct neighbour *neigh = NULL;
>> >@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
>> >
>> > if (daddr == cpu_to_be32(INADDR_NONE))
>> > goto out;
>> >- rt = ip_route_output_flow(&init_net, &fl4, NULL);
>> >+ rt = ip_route_output_flow(net, &fl4, NULL);
>> > if (IS_ERR(rt))
>> > goto out;
>> > if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
>> >@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
>> > if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
>> > smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
>> > struct in_device *in_dev = __in_dev_get_rcu(ndev);
>> >+ struct net *net = dev_net(ndev);
>> > const struct in_ifaddr *ifa;
>> > bool subnet_match = false;
>> >
>> >@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
>> > }
>> > if (!subnet_match)
>> > goto out;
>> >- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
>> >+ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
>> > smcrv2->daddr,
>> > smcrv2->nexthop_mac,
>> > &smcrv2->uses_gateway))
>> >diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
>> >index 4df5f8c8a0a1..ef8ac2b7546d 100644
>> >--- a/net/smc/smc_ib.h
>> >+++ b/net/smc/smc_ib.h
>> >@@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
>> > int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
>> > unsigned short vlan_id, u8 gid[], u8 *sgid_index,
>> > struct smc_init_info_smcrv2 *smcrv2);
>> >-int smc_ib_find_route(__be32 saddr, __be32 daddr,
>> >+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
>> > u8 nexthop_mac[], u8 *uses_gateway);
>> > bool smc_ib_is_valid_local_systemid(void);
>> > int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
>> >--
>> >2.37.1 (Apple Git-137.1)
>>
Leon Romanovsky <[email protected]> 于2023年9月27日周三 13:55写道:
>
> On Wed, Sep 27, 2023 at 11:42:09AM +0800, Dust Li wrote:
> > On Mon, Sep 25, 2023 at 10:35:45AM +0800, Albert Huang wrote:
> > >If the netdevice is within a container and communicates externally
> > >through network technologies like VXLAN, we won't be able to find
> > >routing information in the init_net namespace. To address this issue,
> >
> > Thanks for your founding !
> >
> > I think this is a more generic problem, but not just related to VXLAN ?
> > If we use SMC-R v2 and the netdevice is in a net namespace which is not
> > init_net, we should always fail, right ? If so, I'd prefer this to be a bugfix.
>
> BTW, does this patch take into account net namespace of ib_device?
>
> Thanks
>
As dust said, the ib_device works well.
bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
{
return (ib_devices_shared_netns ||
net_eq(read_pnet(&dev->coredev.rdma_net), net));
}
EXPORT_SYMBOL(rdma_dev_access_netns);
thanks!
BR
Albert.
> >
> > Best regards,
> > Dust
> >
> > >we need to add a struct net parameter to the smc_ib_find_route function.
> > >This allow us to locate the routing information within the corresponding
> > >net namespace, ensuring the correct completion of the SMC CLC interaction.
> > >
> > >Signed-off-by: Albert Huang <[email protected]>
> > >---
> > > net/smc/af_smc.c | 3 ++-
> > > net/smc/smc_ib.c | 7 ++++---
> > > net/smc/smc_ib.h | 2 +-
> > > 3 files changed, 7 insertions(+), 5 deletions(-)
> > >
> > >diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> > >index bacdd971615e..7a874da90c7f 100644
> > >--- a/net/smc/af_smc.c
> > >+++ b/net/smc/af_smc.c
> > >@@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
> > > (struct smc_clc_msg_accept_confirm_v2 *)aclc;
> > > struct smc_clc_first_contact_ext *fce =
> > > smc_get_clc_first_contact_ext(clc_v2, false);
> > >+ struct net *net = sock_net(&smc->sk);
> > > int rc;
> > >
> > > if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
> > >@@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
> > > memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
> > > ini->smcrv2.uses_gateway = false;
> > > } else {
> > >- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
> > >+ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
> > > smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
> > > ini->smcrv2.nexthop_mac,
> > > &ini->smcrv2.uses_gateway))
> > >diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
> > >index 9b66d6aeeb1a..89981dbe46c9 100644
> > >--- a/net/smc/smc_ib.c
> > >+++ b/net/smc/smc_ib.c
> > >@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
> > > return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
> > > }
> > >
> > >-int smc_ib_find_route(__be32 saddr, __be32 daddr,
> > >+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
> > > u8 nexthop_mac[], u8 *uses_gateway)
> > > {
> > > struct neighbour *neigh = NULL;
> > >@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
> > >
> > > if (daddr == cpu_to_be32(INADDR_NONE))
> > > goto out;
> > >- rt = ip_route_output_flow(&init_net, &fl4, NULL);
> > >+ rt = ip_route_output_flow(net, &fl4, NULL);
> > > if (IS_ERR(rt))
> > > goto out;
> > > if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
> > >@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> > > if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
> > > smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
> > > struct in_device *in_dev = __in_dev_get_rcu(ndev);
> > >+ struct net *net = dev_net(ndev);
> > > const struct in_ifaddr *ifa;
> > > bool subnet_match = false;
> > >
> > >@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> > > }
> > > if (!subnet_match)
> > > goto out;
> > >- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
> > >+ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
> > > smcrv2->daddr,
> > > smcrv2->nexthop_mac,
> > > &smcrv2->uses_gateway))
> > >diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
> > >index 4df5f8c8a0a1..ef8ac2b7546d 100644
> > >--- a/net/smc/smc_ib.h
> > >+++ b/net/smc/smc_ib.h
> > >@@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
> > > int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
> > > unsigned short vlan_id, u8 gid[], u8 *sgid_index,
> > > struct smc_init_info_smcrv2 *smcrv2);
> > >-int smc_ib_find_route(__be32 saddr, __be32 daddr,
> > >+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
> > > u8 nexthop_mac[], u8 *uses_gateway);
> > > bool smc_ib_is_valid_local_systemid(void);
> > > int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
> > >--
> > >2.37.1 (Apple Git-137.1)
> >
On Wed, Sep 27, 2023 at 08:17:40PM +0800, Dust Li wrote:
> On Wed, Sep 27, 2023 at 08:55:28AM +0300, Leon Romanovsky wrote:
> >On Wed, Sep 27, 2023 at 11:42:09AM +0800, Dust Li wrote:
> >> On Mon, Sep 25, 2023 at 10:35:45AM +0800, Albert Huang wrote:
> >> >If the netdevice is within a container and communicates externally
> >> >through network technologies like VXLAN, we won't be able to find
> >> >routing information in the init_net namespace. To address this issue,
> >>
> >> Thanks for your founding !
> >>
> >> I think this is a more generic problem, but not just related to VXLAN ?
> >> If we use SMC-R v2 and the netdevice is in a net namespace which is not
> >> init_net, we should always fail, right ? If so, I'd prefer this to be a bugfix.
> >
> >BTW, does this patch take into account net namespace of ib_device?
>
> I think this patch is irrelevant with the netns of ib_device.
>
> SMC has a global smc_ib_devices list reported by ib_client, and checked
> the netns using rdma_dev_access_netns. So I think we should have handled
> that well.
ok, I see
Thanks,
Reviewed-by: Leon Romanovsky <[email protected]>
On Mon, 2023-09-25 at 10:35 +0800, Albert Huang wrote:
> If the netdevice is within a container and communicates externally
> through network technologies like VXLAN, we won't be able to find
> routing information in the init_net namespace. To address this issue,
> we need to add a struct net parameter to the smc_ib_find_route function.
> This allow us to locate the routing information within the corresponding
> net namespace, ensuring the correct completion of the SMC CLC interaction.
>
> Signed-off-by: Albert Huang <[email protected]>
> ---
> net/smc/af_smc.c | 3 ++-
> net/smc/smc_ib.c | 7 ++++---
> net/smc/smc_ib.h | 2 +-
> 3 files changed, 7 insertions(+), 5 deletions(-)
>
I'm trying to test this patch on s390x but I'm running into the same
issue I ran into with the original SMC namespace
support:https://lore.kernel.org/netdev/[email protected]/
Just like back then I'm using a server and a client network namespace
on the same system with two ConnectX-4 VFs from the same card and port.
Both TCP/IP traffic as well as user-space RDMA via "qperf … rc_bw" and
`qperf … rc_lat` work between namespaces and definitely go via the
card.
I did use "rdma system set netns exclusive" then moved the RDMA devices
into the namespaces with "rdma dev set <rdma_dev> netns <namespace>". I
also verified with "ip netns exec <namespace> rdma dev"
that the RDMA devices are in the network namespace and as seen by the
qperf runs normal RDMA does work.
For reference the smc_chck tool gives me the following output:
Server started on port 37373
[DEBUG] Interfaces to check: eno4378
Test with target IP 10.10.93.12 and port 37373
Live test (SMC-D and SMC-R)
[DEBUG] Running client: smc_run /tmp/echo-clt.x0q8iO 10.10.93.12 -p
37373
[DEBUG] Client result: TCP 0x05000000/0x03030000
Failed (TCP fallback), reasons:
Client: 0x05000000 Peer declined during handshake
Server: 0x03030000 No SMC devices found (R and D)
I also checked that SMC is generally working, once I add an ISM device
I do get SMC-D between the namespaces. Any ideas what could break SMC-R
here?
Thanks,
Niklas
On Wed, 2023-09-27 at 11:42 +0800, Dust Li wrote:
> On Mon, Sep 25, 2023 at 10:35:45AM +0800, Albert Huang wrote:
> > If the netdevice is within a container and communicates externally
> > through network technologies like VXLAN, we won't be able to find
> > routing information in the init_net namespace. To address this issue,
>
> Thanks for your founding !
>
> I think this is a more generic problem, but not just related to VXLAN ?
> If we use SMC-R v2 and the netdevice is in a net namespace which is not
> init_net, we should always fail, right ? If so, I'd prefer this to be a bugfix.
Re-stating the above to be on the same page: the patch should be re-
posted targeting the net tree, and including a suitable fixes tag.
@Dust Li: please correct me if I misread you.
Thanks,
Paolo
On Tue, Oct 03, 2023 at 12:41:25PM +0200, Paolo Abeni wrote:
>On Wed, 2023-09-27 at 11:42 +0800, Dust Li wrote:
>> On Mon, Sep 25, 2023 at 10:35:45AM +0800, Albert Huang wrote:
>> > If the netdevice is within a container and communicates externally
>> > through network technologies like VXLAN, we won't be able to find
>> > routing information in the init_net namespace. To address this issue,
>>
>> Thanks for your founding !
>>
>> I think this is a more generic problem, but not just related to VXLAN ?
>> If we use SMC-R v2 and the netdevice is in a net namespace which is not
>> init_net, we should always fail, right ? If so, I'd prefer this to be a bugfix.
>
>Re-stating the above to be on the same page: the patch should be re-
>posted targeting the net tree, and including a suitable fixes tag.
>
>@Dust Li: please correct me if I misread you.
Right, this is exactly what I mean.
Best regards,
Dust
>
>Thanks,
>
>Paolo
On Thu, Sep 28, 2023 at 05:04:21PM +0200, Niklas Schnelle wrote:
>On Mon, 2023-09-25 at 10:35 +0800, Albert Huang wrote:
>> If the netdevice is within a container and communicates externally
>> through network technologies like VXLAN, we won't be able to find
>> routing information in the init_net namespace. To address this issue,
>> we need to add a struct net parameter to the smc_ib_find_route function.
>> This allow us to locate the routing information within the corresponding
>> net namespace, ensuring the correct completion of the SMC CLC interaction.
>>
>> Signed-off-by: Albert Huang <[email protected]>
>> ---
>> net/smc/af_smc.c | 3 ++-
>> net/smc/smc_ib.c | 7 ++++---
>> net/smc/smc_ib.h | 2 +-
>> 3 files changed, 7 insertions(+), 5 deletions(-)
>>
>
>I'm trying to test this patch on s390x but I'm running into the same
>issue I ran into with the original SMC namespace
>support:https://lore.kernel.org/netdev/[email protected]/
>
>Just like back then I'm using a server and a client network namespace
>on the same system with two ConnectX-4 VFs from the same card and port.
>Both TCP/IP traffic as well as user-space RDMA via "qperf … rc_bw" and
>`qperf … rc_lat` work between namespaces and definitely go via the
>card.
>
>I did use "rdma system set netns exclusive" then moved the RDMA devices
>into the namespaces with "rdma dev set <rdma_dev> netns <namespace>". I
>also verified with "ip netns exec <namespace> rdma dev"
>that the RDMA devices are in the network namespace and as seen by the
>qperf runs normal RDMA does work.
>
>For reference the smc_chck tool gives me the following output:
>
>Server started on port 37373
>[DEBUG] Interfaces to check: eno4378
>Test with target IP 10.10.93.12 and port 37373
> Live test (SMC-D and SMC-R)
>[DEBUG] Running client: smc_run /tmp/echo-clt.x0q8iO 10.10.93.12 -p
>37373
>[DEBUG] Client result: TCP 0x05000000/0x03030000
> Failed (TCP fallback), reasons:
> Client: 0x05000000 Peer declined during handshake
> Server: 0x03030000 No SMC devices found (R and D)
>
>I also checked that SMC is generally working, once I add an ISM device
>I do get SMC-D between the namespaces. Any ideas what could break SMC-R
>here?
I missed the email :(
Are you running SMC-Rv2 or v1 ?
Best regards,
Dust
>
>Thanks,
>Niklas
On Wed, Oct 11, 2023 at 10:48:16PM +0800, Dust Li wrote:
>On Thu, Sep 28, 2023 at 05:04:21PM +0200, Niklas Schnelle wrote:
>>On Mon, 2023-09-25 at 10:35 +0800, Albert Huang wrote:
>>> If the netdevice is within a container and communicates externally
>>> through network technologies like VXLAN, we won't be able to find
>>> routing information in the init_net namespace. To address this issue,
>>> we need to add a struct net parameter to the smc_ib_find_route function.
>>> This allow us to locate the routing information within the corresponding
>>> net namespace, ensuring the correct completion of the SMC CLC interaction.
>>>
>>> Signed-off-by: Albert Huang <[email protected]>
>>> ---
>>> net/smc/af_smc.c | 3 ++-
>>> net/smc/smc_ib.c | 7 ++++---
>>> net/smc/smc_ib.h | 2 +-
>>> 3 files changed, 7 insertions(+), 5 deletions(-)
>>>
>>
>>I'm trying to test this patch on s390x but I'm running into the same
>>issue I ran into with the original SMC namespace
>>support:https://lore.kernel.org/netdev/[email protected]/
>>
>>Just like back then I'm using a server and a client network namespace
>>on the same system with two ConnectX-4 VFs from the same card and port.
>>Both TCP/IP traffic as well as user-space RDMA via "qperf … rc_bw" and
>>`qperf … rc_lat` work between namespaces and definitely go via the
>>card.
>>
>>I did use "rdma system set netns exclusive" then moved the RDMA devices
>>into the namespaces with "rdma dev set <rdma_dev> netns <namespace>". I
>>also verified with "ip netns exec <namespace> rdma dev"
>>that the RDMA devices are in the network namespace and as seen by the
>>qperf runs normal RDMA does work.
>>
>>For reference the smc_chck tool gives me the following output:
>>
>>Server started on port 37373
>>[DEBUG] Interfaces to check: eno4378
>>Test with target IP 10.10.93.12 and port 37373
>> Live test (SMC-D and SMC-R)
>>[DEBUG] Running client: smc_run /tmp/echo-clt.x0q8iO 10.10.93.12 -p
>>37373
>>[DEBUG] Client result: TCP 0x05000000/0x03030000
>> Failed (TCP fallback), reasons:
>> Client: 0x05000000 Peer declined during handshake
>> Server: 0x03030000 No SMC devices found (R and D)
>>
>>I also checked that SMC is generally working, once I add an ISM device
>>I do get SMC-D between the namespaces. Any ideas what could break SMC-R
>>here?
>
>I missed the email :(
>
>Are you running SMC-Rv2 or v1 ?
Hi Niklas,
I tried your test today, and I encounter the same issue.
But I found it's because my 2 VFs are in difference subnets,
SMC-Rv2 work fine, SMC-Rv1 won't work, which is expected.
When I set the 2 VFs in the same subnet, SMC-Rv1 also works.
So I'm not sure it's the same for you. Can you check it out ?
BTW, the fallback reason(SMC_CLC_DECL_NOSMCDEV) in this case
is really not friendly, it's better to return SMC_CLC_DECL_DIFFPREFIX.
Best regards,
Dust
>
>Best regards,
>Dust
>
>
>>
>>Thanks,
>>Niklas
On 12.10.23 14:17, Dust Li wrote:
> On Wed, Oct 11, 2023 at 10:48:16PM +0800, Dust Li wrote:
>> On Thu, Sep 28, 2023 at 05:04:21PM +0200, Niklas Schnelle wrote:
>>> On Mon, 2023-09-25 at 10:35 +0800, Albert Huang wrote:
>>>> If the netdevice is within a container and communicates externally
>>>> through network technologies like VXLAN, we won't be able to find
>>>> routing information in the init_net namespace. To address this issue,
>>>> we need to add a struct net parameter to the smc_ib_find_route function.
>>>> This allow us to locate the routing information within the corresponding
>>>> net namespace, ensuring the correct completion of the SMC CLC interaction.
>>>>
>>>> Signed-off-by: Albert Huang <[email protected]>
>>>> ---
>>>> net/smc/af_smc.c | 3 ++-
>>>> net/smc/smc_ib.c | 7 ++++---
>>>> net/smc/smc_ib.h | 2 +-
>>>> 3 files changed, 7 insertions(+), 5 deletions(-)
>>>>
>>>
>>> I'm trying to test this patch on s390x but I'm running into the same
>>> issue I ran into with the original SMC namespace
>>> support:https://lore.kernel.org/netdev/[email protected]/
>>>
>>> Just like back then I'm using a server and a client network namespace
>>> on the same system with two ConnectX-4 VFs from the same card and port.
>>> Both TCP/IP traffic as well as user-space RDMA via "qperf … rc_bw" and
>>> `qperf … rc_lat` work between namespaces and definitely go via the
>>> card.
>>>
>>> I did use "rdma system set netns exclusive" then moved the RDMA devices
>>> into the namespaces with "rdma dev set <rdma_dev> netns <namespace>". I
>>> also verified with "ip netns exec <namespace> rdma dev"
>>> that the RDMA devices are in the network namespace and as seen by the
>>> qperf runs normal RDMA does work.
>>>
>>> For reference the smc_chck tool gives me the following output:
>>>
>>> Server started on port 37373
>>> [DEBUG] Interfaces to check: eno4378
>>> Test with target IP 10.10.93.12 and port 37373
>>> Live test (SMC-D and SMC-R)
>>> [DEBUG] Running client: smc_run /tmp/echo-clt.x0q8iO 10.10.93.12 -p
>>> 37373
>>> [DEBUG] Client result: TCP 0x05000000/0x03030000
>>> Failed (TCP fallback), reasons:
>>> Client: 0x05000000 Peer declined during handshake
>>> Server: 0x03030000 No SMC devices found (R and D)
>>>
>>> I also checked that SMC is generally working, once I add an ISM device
>>> I do get SMC-D between the namespaces. Any ideas what could break SMC-R
>>> here?
>>
>> I missed the email :(
>>
>> Are you running SMC-Rv2 or v1 ?
>
> Hi Niklas,
>
> I tried your test today, and I encounter the same issue.
> But I found it's because my 2 VFs are in difference subnets,
> SMC-Rv2 work fine, SMC-Rv1 won't work, which is expected.
> When I set the 2 VFs in the same subnet, SMC-Rv1 also works.
>
> So I'm not sure it's the same for you. Can you check it out ?
>
> BTW, the fallback reason(SMC_CLC_DECL_NOSMCDEV) in this case
> is really not friendly, it's better to return SMC_CLC_DECL_DIFFPREFIX.
>
> Best regards,
> Dust
>
Thank you, Dust, for trying it out!
The reason code SMC_CLC_DECL_NOSMCDEV there could really make one
misunderstand.
>
>>
>> Best regards,
>> Dust
>>
>>
>>>
>>> Thanks,
>>> Niklas
On Thu, 2023-10-12 at 20:17 +0800, Dust Li wrote:
> On Wed, Oct 11, 2023 at 10:48:16PM +0800, Dust Li wrote:
> > On Thu, Sep 28, 2023 at 05:04:21PM +0200, Niklas Schnelle wrote:
> > > On Mon, 2023-09-25 at 10:35 +0800, Albert Huang wrote:
> > > > If the netdevice is within a container and communicates externally
> > > > through network technologies like VXLAN, we won't be able to find
> > > > routing information in the init_net namespace. To address this issue,
> > > > we need to add a struct net parameter to the smc_ib_find_route function.
> > > > This allow us to locate the routing information within the corresponding
> > > > net namespace, ensuring the correct completion of the SMC CLC interaction.
> > > >
> > > > Signed-off-by: Albert Huang <[email protected]>
> > > > ---
> > > > net/smc/af_smc.c | 3 ++-
> > > > net/smc/smc_ib.c | 7 ++++---
> > > > net/smc/smc_ib.h | 2 +-
> > > > 3 files changed, 7 insertions(+), 5 deletions(-)
> > > >
> > >
> > > I'm trying to test this patch on s390x but I'm running into the same
> > > issue I ran into with the original SMC namespace
> > > support:https://lore.kernel.org/netdev/[email protected]/
> > >
> > > Just like back then I'm using a server and a client network namespace
> > > on the same system with two ConnectX-4 VFs from the same card and port.
> > > Both TCP/IP traffic as well as user-space RDMA via "qperf … rc_bw" and
> > > `qperf … rc_lat` work between namespaces and definitely go via the
> > > card.
> > >
> > > I did use "rdma system set netns exclusive" then moved the RDMA devices
> > > into the namespaces with "rdma dev set <rdma_dev> netns <namespace>". I
> > > also verified with "ip netns exec <namespace> rdma dev"
> > > that the RDMA devices are in the network namespace and as seen by the
> > > qperf runs normal RDMA does work.
> > >
> > > For reference the smc_chck tool gives me the following output:
> > >
> > > Server started on port 37373
> > > [DEBUG] Interfaces to check: eno4378
> > > Test with target IP 10.10.93.12 and port 37373
> > > Live test (SMC-D and SMC-R)
> > > [DEBUG] Running client: smc_run /tmp/echo-clt.x0q8iO 10.10.93.12 -p
> > > 37373
> > > [DEBUG] Client result: TCP 0x05000000/0x03030000
> > > Failed (TCP fallback), reasons:
> > > Client: 0x05000000 Peer declined during handshake
> > > Server: 0x03030000 No SMC devices found (R and D)
> > >
> > > I also checked that SMC is generally working, once I add an ISM device
> > > I do get SMC-D between the namespaces. Any ideas what could break SMC-R
> > > here?
> >
> > I missed the email :(
> >
> > Are you running SMC-Rv2 or v1 ?
>
> Hi Niklas,
>
> I tried your test today, and I encounter the same issue.
> But I found it's because my 2 VFs are in difference subnets,
> SMC-Rv2 work fine, SMC-Rv1 won't work, which is expected.
> When I set the 2 VFs in the same subnet, SMC-Rv1 also works.
>
> So I'm not sure it's the same for you. Can you check it out ?
>
> BTW, the fallback reason(SMC_CLC_DECL_NOSMCDEV) in this case
> is really not friendly, it's better to return SMC_CLC_DECL_DIFFPREFIX.
>
> Best regards,
> Dust
I think you are right. I did use two consecutive private IPs but I had
set the subnet mask to /32. Setting that to /16 the SMC-R connection is
established. I'll work with Wenjia and Jan on why my system is
defaulting to SMC-Rv1 I would have hoped to get SMC-Rv2.
Thanks for your insights!
Niklas