Recently we run a network test over ipcomp virtual tunnel.We find that
if a ipv4 packet needs fragment, then the peer can't receive
it.
We deep into the code and find that when packet need fragment the smaller
fragment will be encapsulated by ipip not ipcomp. So when the ipip packet
goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code
always set skb'dev to the last fragment's dev. After ipv4 defrag processing,
when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV
error.
This patch adds compatible support for the ipip process in ipcomp virtual tunnel.
Signed-off-by: Su Yanjun <[email protected]>
---
net/ipv4/ip_vti.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index de31b30..63de2f6 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -65,6 +65,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
+ if (iph->protocol == IPPROTO_IPIP)
+ skb->dev = tunnel->dev;
+
return xfrm_input(skb, nexthdr, spi, encap_type);
}
@@ -76,10 +79,15 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
static int vti_rcv(struct sk_buff *skb)
{
+ __be32 spi = 0;
+
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+
+ if (ip_hdr(skb)->protocol == IPPROTO_IPIP)
+ spi = ip_hdr(skb)->saddr;
- return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
+ return vti_input(skb, ip_hdr(skb)->protocol, spi, 0);
}
static int vti_rcv_cb(struct sk_buff *skb, int err)
@@ -429,6 +437,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
.priority = 100,
};
+static struct xfrm_tunnel ipip_handler __read_mostly = {
+ .handler = vti_rcv,
+ .err_handler = vti4_err,
+ .priority = 0,
+};
+
static int __net_init vti_init_net(struct net *net)
{
int err;
@@ -597,6 +611,13 @@ static int __init vti_init(void)
if (err < 0)
goto xfrm_proto_comp_failed;
+ msg = "ipip tunnel";
+ err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+ if (err < 0) {
+ pr_info("%s: cant't register tunnel\n",__func__);
+ goto xfrm_tunnel_failed;
+ }
+
msg = "netlink interface";
err = rtnl_link_register(&vti_link_ops);
if (err < 0)
@@ -606,6 +627,8 @@ static int __init vti_init(void)
rtnl_link_failed:
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
+xfrm_tunnel_failed:
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm_proto_ah_failed:
--
2.7.4
On Thu, Jan 03, 2019 at 07:48:41AM -0500, Su Yanjun wrote:
> Recently we run a network test over ipcomp virtual tunnel.We find that
> if a ipv4 packet needs fragment, then the peer can't receive
> it.
>
> We deep into the code and find that when packet need fragment the smaller
> fragment will be encapsulated by ipip not ipcomp. So when the ipip packet
> goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code
> always set skb'dev to the last fragment's dev. After ipv4 defrag processing,
> when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV
> error.
Why not just leaving rp_filter disabled or in 'loose mode' if you use ipcomp?
>
> This patch adds compatible support for the ipip process in ipcomp virtual tunnel.
>
> Signed-off-by: Su Yanjun <[email protected]>
> ---
> net/ipv4/ip_vti.c | 25 ++++++++++++++++++++++++-
> 1 file changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
> index de31b30..63de2f6 100644
> --- a/net/ipv4/ip_vti.c
> +++ b/net/ipv4/ip_vti.c
> @@ -65,6 +65,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
>
> XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
>
> + if (iph->protocol == IPPROTO_IPIP)
> + skb->dev = tunnel->dev;
> +
> return xfrm_input(skb, nexthdr, spi, encap_type);
> }
>
> @@ -76,10 +79,15 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
>
> static int vti_rcv(struct sk_buff *skb)
> {
> + __be32 spi = 0;
> +
> XFRM_SPI_SKB_CB(skb)->family = AF_INET;
> XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
> +
> + if (ip_hdr(skb)->protocol == IPPROTO_IPIP)
> + spi = ip_hdr(skb)->saddr;
>
> - return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
> + return vti_input(skb, ip_hdr(skb)->protocol, spi, 0);
You use the src address as spi, how is this supposed to work?
On 1/4/2019 3:43 PM, Steffen Klassert wrote:
> On Thu, Jan 03, 2019 at 07:48:41AM -0500, Su Yanjun wrote:
>> Recently we run a network test over ipcomp virtual tunnel.We find that
>> if a ipv4 packet needs fragment, then the peer can't receive
>> it.
>>
>> We deep into the code and find that when packet need fragment the smaller
>> fragment will be encapsulated by ipip not ipcomp. So when the ipip packet
>> goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code
>> always set skb'dev to the last fragment's dev. After ipv4 defrag processing,
>> when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV
>> error.
> Why not just leaving rp_filter disabled or in 'loose mode' if you use ipcomp?
In my option rp_filter should not affect the ip_vti functionality.
The root cause is the origin tunnel code doesn't update skb->dev.
vti only cares about ipcomp, esp, ah packets.
>> This patch adds compatible support for the ipip process in ipcomp virtual tunnel.
>>
>> Signed-off-by: Su Yanjun <[email protected]>
>> ---
>> net/ipv4/ip_vti.c | 25 ++++++++++++++++++++++++-
>> 1 file changed, 24 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
>> index de31b30..63de2f6 100644
>> --- a/net/ipv4/ip_vti.c
>> +++ b/net/ipv4/ip_vti.c
>> @@ -65,6 +65,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
>>
>> XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
>>
>> + if (iph->protocol == IPPROTO_IPIP)
>> + skb->dev = tunnel->dev;
>> +
>> return xfrm_input(skb, nexthdr, spi, encap_type);
>> }
>>
>> @@ -76,10 +79,15 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
>>
>> static int vti_rcv(struct sk_buff *skb)
>> {
>> + __be32 spi = 0;
>> +
>> XFRM_SPI_SKB_CB(skb)->family = AF_INET;
>> XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
>> +
>> + if (ip_hdr(skb)->protocol == IPPROTO_IPIP)
>> + spi = ip_hdr(skb)->saddr;
>>
>> - return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
>> + return vti_input(skb, ip_hdr(skb)->protocol, spi, 0);
> You use the src address as spi, how is this supposed to work?
>
This code derives from xfrm4_tunnel and i just want the vti can handle
ipip packet as xfrm4 tunnel does.
Thanks,
Su
On Fri, Jan 04, 2019 at 04:42:05PM +0800, Su Yanjun <[email protected]> wrote:
> On 1/4/2019 3:43 PM, Steffen Klassert wrote:
>
> > On Thu, Jan 03, 2019 at 07:48:41AM -0500, Su Yanjun wrote:
> > > Recently we run a network test over ipcomp virtual tunnel.We find that
> > > if a ipv4 packet needs fragment, then the peer can't receive
> > > it.
> > >
> > > We deep into the code and find that when packet need fragment the smaller
> > > fragment will be encapsulated by ipip not ipcomp. So when the ipip packet
> > > goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code
> > > always set skb'dev to the last fragment's dev. After ipv4 defrag processing,
> > > when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV
> > > error.
> > Why not just leaving rp_filter disabled or in 'loose mode' if you use ipcomp?
> In my option rp_filter should not affect the ip_vti functionality.
> The root cause is the origin tunnel code doesn't update skb->dev.
> vti only cares about ipcomp, esp, ah packets.
> > > This patch adds compatible support for the ipip process in ipcomp virtual tunnel.
> > >
> > > Signed-off-by: Su Yanjun <[email protected]>
> > > ---
> > > net/ipv4/ip_vti.c | 25 ++++++++++++++++++++++++-
> > > 1 file changed, 24 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
> > > index de31b30..63de2f6 100644
> > > --- a/net/ipv4/ip_vti.c
> > > +++ b/net/ipv4/ip_vti.c
> > > @@ -65,6 +65,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
> > > XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
> > > + if (iph->protocol == IPPROTO_IPIP)
> > > + skb->dev = tunnel->dev;
> > > +
> > > return xfrm_input(skb, nexthdr, spi, encap_type);
> > > }
> > > @@ -76,10 +79,15 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
> > > static int vti_rcv(struct sk_buff *skb)
> > > {
> > > + __be32 spi = 0;
> > > +
> > > XFRM_SPI_SKB_CB(skb)->family = AF_INET;
> > > XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
> > > +
> > > + if (ip_hdr(skb)->protocol == IPPROTO_IPIP)
> > > + spi = ip_hdr(skb)->saddr;
> > > - return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
> > > + return vti_input(skb, ip_hdr(skb)->protocol, spi, 0);
> > You use the src address as spi, how is this supposed to work?
> >
> This code derives from xfrm4_tunnel and i just want the vti can handle ipip
> packet as xfrm4 tunnel does.
Ok, I see what it is. ipcomp needs an extra SA to handle the 'not
compressed' packets. This SA uses the src address as the spi.
So this is ok, but please add separate handlers for this case.