When tunneling IP traffic with GRE this patch makes it possible to export the ToS/DiffServ information to the outer IP header.
This is particularly useful in a scenario with ESP/AH where the inner IP header is encrypted but the packet priority/DiffServ information
should still be respected by the transporting routers (for example in an MPLS backbone network).
The feature is disabled by default and can be enabled on a per-interface basis (/proc/sys/net/ipv4/conf/ethX/gre_copy_tos).
Also does this bring Linux back in the game, as JunOS/IOS provide this for quite some time:
http://www.cisco.com/en/US/docs/ios/11_3/feature/guide/greqos.html
http://www.juniper.net/techpubs/software/junos/junos94/swconfig-services/configuring-a-gre-tunnel-to-copy-tos-bits-to-the-outer-ip-header.html
Signed-off-by: Andreas Jaggi <[email protected]>
diff -urN vanilla-linux-2.6.29.4/include/linux/sysctl.h gre-copy-tos/include/linux/sysctl.h
--- vanilla-linux-2.6.29.4/include/linux/sysctl.h 2009-05-19 01:52:34.000000000 +0200
+++ gre-copy-tos/include/linux/sysctl.h 2009-06-29 14:23:07.000000000 +0200
@@ -490,6 +490,7 @@
NET_IPV4_CONF_ARP_IGNORE=19,
NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
NET_IPV4_CONF_ARP_ACCEPT=21,
+ NET_IPV4_CONF_GRE_COPY_TOS=22,
__NET_IPV4_CONF_MAX
};
diff -urN vanilla-linux-2.6.29.4/kernel/sysctl_check.c gre-copy-tos/kernel/sysctl_check.c
--- vanilla-linux-2.6.29.4/kernel/sysctl_check.c 2009-05-19 01:52:34.000000000 +0200
+++ gre-copy-tos/kernel/sysctl_check.c 2009-06-29 14:23:07.000000000 +0200
@@ -219,6 +219,7 @@
{ NET_IPV4_CONF_ARP_IGNORE, "arp_ignore" },
{ NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" },
{ NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" },
+ { NET_IPV4_CONF_GRE_COPY_TOS, "gre_copy_tos" },
{}
};
diff -urN vanilla-linux-2.6.29.4/net/ipv4/devinet.c gre-copy-tos/net/ipv4/devinet.c
--- vanilla-linux-2.6.29.4/net/ipv4/devinet.c 2009-05-19 01:52:34.000000000 +0200
+++ gre-copy-tos/net/ipv4/devinet.c 2009-06-29 14:23:07.000000000 +0200
@@ -1439,6 +1439,7 @@
DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
+ DEVINET_SYSCTL_RW_ENTRY(GRE_COPY_TOS, "gre_copy_tos"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff -urN vanilla-linux-2.6.29.4/net/ipv4/ip_gre.c gre-copy-tos/net/ipv4/ip_gre.c
--- vanilla-linux-2.6.29.4/net/ipv4/ip_gre.c 2009-05-19 01:52:34.000000000 +0200
+++ gre-copy-tos/net/ipv4/ip_gre.c 2009-06-29 14:23:24.000000000 +0200
@@ -610,6 +610,7 @@
struct net_device_stats *stats = &tunnel->dev->stats;
struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *tiph;
+ struct in_device *in_dev;
u8 tos;
__be16 df;
struct rtable *rt; /* Route to the other host */
@@ -677,11 +678,13 @@
}
tos = tiph->tos;
- if (tos&1) {
+ in_dev = in_dev_get(dev);
+ if ((in_dev && IN_DEV_ORCONF(in_dev, GRE_COPY_TOS)) || tos&1) {
if (skb->protocol == htons(ETH_P_IP))
tos = old_iph->tos;
tos &= ~1;
}
+ in_dev_put(in_dev);
{
struct flowi fl = { .oif = tunnel->parms.link,
Andreas Jaggi wrote:
> When tunneling IP traffic with GRE this patch makes it possible to
> export the ToS/DiffServ information to the outer IP header.
> This is particularly useful in a scenario with ESP/AH where the inner IP
> header is encrypted but the packet priority/DiffServ information
> should still be respected by the transporting routers (for example in an
> MPLS backbone network).
>
> The feature is disabled by default and can be enabled on a per-interface
> basis (/proc/sys/net/ipv4/conf/ethX/gre_copy_tos).
>
> Also does this bring Linux back in the game, as JunOS/IOS provide this
> for quite some time:
> http://www.cisco.com/en/US/docs/ios/11_3/feature/guide/greqos.html
> http://www.juniper.net/techpubs/software/junos/junos94/swconfig-services/configuring-a-gre-tunnel-to-copy-tos-bits-to-the-outer-ip-header.html
>
>
> Signed-off-by: Andreas Jaggi <[email protected]>
>
> diff -urN vanilla-linux-2.6.29.4/include/linux/sysctl.h
> gre-copy-tos/include/linux/sysctl.h
> --- vanilla-linux-2.6.29.4/include/linux/sysctl.h 2009-05-19
> 01:52:34.000000000 +0200
> +++ gre-copy-tos/include/linux/sysctl.h 2009-06-29 14:23:07.000000000
> +0200
> @@ -490,6 +490,7 @@
> NET_IPV4_CONF_ARP_IGNORE=19,
> NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
> NET_IPV4_CONF_ARP_ACCEPT=21,
> + NET_IPV4_CONF_GRE_COPY_TOS=22,
Please add this to the rtnl_link interface instead of using
sysctls.
Now using the rtnl_link interface.
>> When tunneling IP traffic with GRE this patch makes it possible to
>> export the ToS/DiffServ information to the outer IP header.
>> This is particularly useful in a scenario with ESP/AH where the inner
>> IP header is encrypted but the packet priority/DiffServ information
>> should still be respected by the transporting routers (for example in
>> an MPLS backbone network).
>>
>> The feature is disabled by default and can be enabled on a
>> per-interface basis.
>>
>> Also does this bring Linux back in the game, as JunOS/IOS provide this
>> for quite some time:
>> http://www.cisco.com/en/US/docs/ios/11_3/feature/guide/greqos.html
>> http://www.juniper.net/techpubs/software/junos/junos94/swconfig-services/configuring-a-gre-tunnel-to-copy-tos-bits-to-the-outer-ip-header.html
diff -urN vanilla-linux-2.6.29.4/include/linux/if_tunnel.h dev-gre/include/linux/if_tunnel.h
--- vanilla-linux-2.6.29.4/include/linux/if_tunnel.h 2009-05-19 01:52:34.000000000 +0200
+++ dev-gre/include/linux/if_tunnel.h 2009-06-30 08:22:52.000000000 +0200
@@ -34,6 +34,7 @@
__be32 i_key;
__be32 o_key;
struct iphdr iph;
+ __u8 copy_tos;
};
/* SIT-mode i_flags */
@@ -63,6 +64,7 @@
IFLA_GRE_REMOTE,
IFLA_GRE_TTL,
IFLA_GRE_TOS,
+ IFLA_GRE_COPY_TOS,
IFLA_GRE_PMTUDISC,
__IFLA_GRE_MAX,
};
diff -urN vanilla-linux-2.6.29.4/net/ipv4/ip_gre.c dev-gre/net/ipv4/ip_gre.c
--- vanilla-linux-2.6.29.4/net/ipv4/ip_gre.c 2009-05-19 01:52:34.000000000 +0200
+++ dev-gre/net/ipv4/ip_gre.c 2009-06-30 10:48:57.000000000 +0200
@@ -677,7 +677,7 @@
}
tos = tiph->tos;
- if (tos&1) {
+ if (tunnel->parms.copy_tos || tos&1) {
if (skb->protocol == htons(ETH_P_IP))
tos = old_iph->tos;
tos &= ~1;
@@ -991,6 +991,7 @@
t->parms.iph.ttl = p.iph.ttl;
t->parms.iph.tos = p.iph.tos;
t->parms.iph.frag_off = p.iph.frag_off;
+ t->parms.copy_tos = p.copy_tos;
if (t->parms.link != p.link) {
t->parms.link = p.link;
dev->mtu = ipgre_tunnel_bind_dev(dev);
@@ -1383,6 +1384,9 @@
if (data[IFLA_GRE_TOS])
parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
+ if (data[IFLA_GRE_COPY_TOS])
+ parms->copy_tos = nla_get_u8(data[IFLA_GRE_COPY_TOS]);
+
if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
}
@@ -1536,6 +1540,8 @@
nla_total_size(1) +
/* IFLA_GRE_TOS */
nla_total_size(1) +
+ /* IFLA_GRE_COPY_TOS */
+ nla_total_size(1) +
/* IFLA_GRE_PMTUDISC */
nla_total_size(1) +
0;
@@ -1555,6 +1561,7 @@
NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
+ NLA_PUT_U8(skb, IFLA_GRE_COPY_TOS, p->copy_tos);
NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
return 0;
@@ -1573,6 +1580,7 @@
[IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
+ [IFLA_GRE_COPY_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
};
And the corresponding patch for iproute2.
>> When tunneling IP traffic with GRE this patch makes it possible to
>> export the ToS/DiffServ information to the outer IP header.
>> This is particularly useful in a scenario with ESP/AH where the inner
>> IP header is encrypted but the packet priority/DiffServ information
>> should still be respected by the transporting routers (for example in
>> an MPLS backbone network).
>>
>> The feature is disabled by default and can be enabled on a
>> per-interface basis (/proc/sys/net/ipv4/conf/ethX/gre_copy_tos).
>>
>> Also does this bring Linux back in the game, as JunOS/IOS provide this
>> for quite some time:
>> http://www.cisco.com/en/US/docs/ios/11_3/feature/guide/greqos.html
>> http://www.juniper.net/techpubs/software/junos/junos94/swconfig-services/configuring-a-gre-tunnel-to-copy-tos-bits-to-the-outer-ip-header.html
diff -urN iproute2-2.6.29-1/include/linux/if_tunnel.h iproute2-2.6.29-1-gre-dev/include/linux/if_tunnel.h
--- iproute2-2.6.29-1/include/linux/if_tunnel.h 2009-03-24 23:40:54.000000000 +0100
+++ iproute2-2.6.29-1-gre-dev/include/linux/if_tunnel.h 2009-06-30 08:39:18.000000000 +0200
@@ -31,6 +31,7 @@
__be32 i_key;
__be32 o_key;
struct iphdr iph;
+ __u8 copy_tos;
};
/* SIT-mode i_flags */
@@ -60,6 +61,7 @@
IFLA_GRE_REMOTE,
IFLA_GRE_TTL,
IFLA_GRE_TOS,
+ IFLA_GRE_COPY_TOS,
IFLA_GRE_PMTUDISC,
__IFLA_GRE_MAX,
};
diff -urN iproute2-2.6.29-1/ip/iptunnel.c iproute2-2.6.29-1-gre-dev/ip/iptunnel.c
--- iproute2-2.6.29-1/ip/iptunnel.c 2009-03-24 23:40:54.000000000 +0100
+++ iproute2-2.6.29-1-gre-dev/ip/iptunnel.c 2009-06-30 10:31:19.000000000 +0200
@@ -41,7 +41,7 @@
fprintf(stderr, "Usage: ip tunnel { add | change | del | show } [ NAME ]\n");
fprintf(stderr, " [ mode { ipip | gre | sit | isatap } ] [ remote ADDR ] [ local ADDR ]\n");
fprintf(stderr, " [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n");
- fprintf(stderr, " [ ttl TTL ] [ tos TOS ] [ [no]pmtudisc ] [ dev PHYS_DEV ]\n");
+ fprintf(stderr, " [ ttl TTL ] [ tos TOS ] [ [no]pmtudisc ] [ [no]copytos ] [ dev PHYS_DEV ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where: NAME := STRING\n");
fprintf(stderr, " ADDR := { IP_ADDRESS | any }\n");
@@ -160,6 +160,10 @@
p->iph.frag_off = 0;
} else if (strcmp(*argv, "pmtudisc") == 0) {
p->iph.frag_off = htons(IP_DF);
+ } else if (strcmp(*argv, "nocopytos") == 0) {
+ p->copy_tos = 0;
+ } else if (strcmp(*argv, "copytos") == 0) {
+ p->copy_tos = 1;
} else if (strcmp(*argv, "remote") == 0) {
NEXT_ARG();
if (strcmp(*argv, "any"))
@@ -353,6 +357,9 @@
if (!(p->iph.frag_off&htons(IP_DF)))
printf(" nopmtudisc");
+ if (p->copy_tos)
+ printf(" copytos");
+
if ((p->i_flags&GRE_KEY) && (p->o_flags&GRE_KEY) && p->o_key == p->i_key)
printf(" key %s", s3);
else if ((p->i_flags|p->o_flags)&GRE_KEY) {
diff -urN iproute2-2.6.29-1/ip/link_gre.c iproute2-2.6.29-1-gre-dev/ip/link_gre.c
--- iproute2-2.6.29-1/ip/link_gre.c 2009-03-24 23:40:54.000000000 +0100
+++ iproute2-2.6.29-1-gre-dev/ip/link_gre.c 2009-06-30 10:31:48.000000000 +0200
@@ -29,7 +29,7 @@
fprintf(stderr, "Usage: ip link { add | set | change | replace | del } NAME\n");
fprintf(stderr, " type { gre | gretap } [ remote ADDR ] [ local ADDR ]\n");
fprintf(stderr, " [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n");
- fprintf(stderr, " [ ttl TTL ] [ tos TOS ] [ [no]pmtudisc ] [ dev PHYS_DEV ]\n");
+ fprintf(stderr, " [ ttl TTL ] [ tos TOS ] [ [no]pmtudisc ] [ [no]copytos ] [ dev PHYS_DEV ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where: NAME := STRING\n");
fprintf(stderr, " ADDR := { IP_ADDRESS | any }\n");
@@ -61,6 +61,7 @@
__u8 pmtudisc = 1;
__u8 ttl = 0;
__u8 tos = 0;
+ __u8 copy_tos = 0;
int len;
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
@@ -125,6 +126,9 @@
if (greinfo[IFLA_GRE_TOS])
tos = *(__u8 *)RTA_DATA(greinfo[IFLA_GRE_TOS]);
+ if (greinfo[IFLA_GRE_COPY_TOS])
+ copy_tos = *(__u8 *)RTA_DATA(greinfo[IFLA_GRE_COPY_TOS]);
+
if (greinfo[IFLA_GRE_LINK])
link = *(__u8 *)RTA_DATA(greinfo[IFLA_GRE_LINK]);
}
@@ -196,6 +200,10 @@
pmtudisc = 0;
} else if (!matches(*argv, "pmtudisc")) {
pmtudisc = 1;
+ } else if (!matches(*argv, "nocopytos")) {
+ copy_tos = 0;
+ } else if (!matches(*argv, "copytos")) {
+ copy_tos = 1;
} else if (!matches(*argv, "remote")) {
NEXT_ARG();
if (strcmp(*argv, "any"))
@@ -262,6 +270,7 @@
addattr32(n, 1024, IFLA_GRE_LINK, link);
addattr_l(n, 1024, IFLA_GRE_TTL, &ttl, 1);
addattr_l(n, 1024, IFLA_GRE_TOS, &tos, 1);
+ addattr_l(n, 1024, IFLA_GRE_COPY_TOS, ©_tos, 1);
return 0;
}
@@ -325,6 +334,10 @@
!*(__u8 *)RTA_DATA(tb[IFLA_GRE_PMTUDISC]))
fputs("nopmtudisc ", f);
+ if (tb[IFLA_GRE_COPY_TOS] &&
+ *(__u8 *)RTA_DATA(tb[IFLA_GRE_COPY_TOS]))
+ fputs("copytos ", f);
+
if (tb[IFLA_GRE_IFLAGS])
iflags = *(__u16 *)RTA_DATA(tb[IFLA_GRE_IFLAGS]);
Andreas Jaggi wrote:
> Now using the rtnl_link interface.
Your patches are seriously whitespace-damaged. Please make sure
to send them properly (test by sending to yourself first).
> diff -urN vanilla-linux-2.6.29.4/include/linux/if_tunnel.h
> dev-gre/include/linux/if_tunnel.h
> --- vanilla-linux-2.6.29.4/include/linux/if_tunnel.h 2009-05-19
> 01:52:34.000000000 +0200
> +++ dev-gre/include/linux/if_tunnel.h 2009-06-30 08:22:52.000000000
> +0200
> @@ -34,6 +34,7 @@
> __be32 i_key;
> __be32 o_key;
> struct iphdr iph;
> + __u8 copy_tos;
You can't change this structure, this will break compatibility.
> };
>
> /* SIT-mode i_flags */
> @@ -63,6 +64,7 @@
> IFLA_GRE_REMOTE,
> IFLA_GRE_TTL,
> IFLA_GRE_TOS,
> + IFLA_GRE_COPY_TOS,
> IFLA_GRE_PMTUDISC,
> __IFLA_GRE_MAX,
You can't add new attributes in the middle, they need to be added at
the end.