2017-09-22 13:49:06

by Haishuang Yan

[permalink] [raw]
Subject: [PATCH v4 1/3] ipv4: Namespaceify tcp_fastopen knob

Different namespace application might require enable TCP Fast Open
feature independently of the host.

This patch series continues making more of the TCP Fast Open related
sysctl knobs be per net-namespace.

Reported-by: Luca BRUNO <[email protected]>
Signed-off-by: Haishuang Yan <[email protected]>

---
Change since v4:
* Fix potential leak issue
* Fix typo error
* Split the patches to be per-sysctl
* Remove unrelated change by mistake
---
include/net/netns/ipv4.h | 1 +
include/net/tcp.h | 1 -
net/ipv4/af_inet.c | 7 ++++---
net/ipv4/sysctl_net_ipv4.c | 14 +++++++-------
net/ipv4/tcp.c | 4 ++--
net/ipv4/tcp_fastopen.c | 11 +++++------
net/ipv4/tcp_ipv4.c | 2 ++
7 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 20d061c..ce6dde0 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -127,6 +127,7 @@ struct netns_ipv4 {
int sysctl_tcp_timestamps;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
+ int sysctl_tcp_fastopen;

#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b510f28..f628967 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@


/* sysctl variables for tcp */
-extern int sysctl_tcp_fastopen;
extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e..ddd126d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
- int err;
+ int err, tcp_fastopen;

lock_sock(sk);

@@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog)
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
- (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+ tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+ (tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
tcp_fastopen_init_key_once(true);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0d3c038..e31e853c 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -401,13 +401,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_fastopen",
- .data = &sysctl_tcp_fastopen,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
.procname = "tcp_fastopen_key",
.mode = 0600,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
@@ -1085,6 +1078,13 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tcp_fastopen",
+ .data = &init_net.ipv4.sysctl_tcp_fastopen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5091402..dac56c4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1126,7 +1126,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;

- if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -2759,7 +2759,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index e3c3322..31b08ec 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -9,8 +9,6 @@
#include <net/inetpeer.h>
#include <net/tcp.h>

-int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE;
-
struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;

static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
@@ -279,21 +277,22 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct tcp_fastopen_cookie *foc)
{
- struct tcp_fastopen_cookie valid_foc = { .len = -1 };
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+ int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;

if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);

- if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+ if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
(syn_data || foc->len >= 0) &&
tcp_fastopen_queue_check(sk))) {
foc->len = -1;
return NULL;
}

- if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+ if (syn_data && (tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;

if (foc->len >= 0 && /* Client presents or requests a cookie */
@@ -347,7 +346,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
return false;
}

- if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
+ if (sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
cookie->len = -1;
return true;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d9416b5..88409b1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2472,6 +2472,8 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;

+ net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
+
return 0;
fail:
tcp_sk_exit(net);
--
1.8.3.1




2017-09-22 13:49:13

by Haishuang Yan

[permalink] [raw]
Subject: [PATCH v4 3/3] ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob

Different namespace application might require different time period in
second to disable Fastopen on active TCP sockets.

Tested:
Simulate following similar situation that the server's data gets dropped
after 3WHS.
C ---- syn-data ---> S
C <--- syn/ack ----- S
C ---- ack --------> S
S (accept & write)
C? X <- data ------ S
[retry and timeout]

And then print netstat of TCPFastOpenBlackhole, the counter increased as
expected when the firewall blackhole issue is detected and active TFO is
disabled.
# cat /proc/net/netstat | awk '{print $91}'
TCPFastOpenBlackhole
1

Signed-off-by: Haishuang Yan <[email protected]>
---
include/net/netns/ipv4.h | 3 +++
net/ipv4/sysctl_net_ipv4.c | 20 +++++++++++---------
net/ipv4/tcp_fastopen.c | 30 +++++++++++-------------------
net/ipv4/tcp_ipv4.c | 2 ++
4 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 66b8335..d76edde 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,9 @@ struct netns_ipv4 {
int sysctl_tcp_fastopen;
struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
spinlock_t tcp_fastopen_ctx_lock;
+ unsigned int sysctl_tcp_fastopen_blackhole_timeout;
+ atomic_t tfo_active_disable_times;
+ unsigned long tfo_active_disable_stamp;

#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 20e19fe..cac8dd3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -355,11 +355,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen_blackhole_timeout);
int ret;

ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);

return ret;
}
@@ -398,14 +400,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_fastopen_blackhole_timeout_sec",
- .data = &sysctl_tcp_fastopen_blackhole_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_tfo_blackhole_detect_timeout,
- .extra1 = &zero,
- },
- {
.procname = "tcp_abort_on_overflow",
.data = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -1083,6 +1077,14 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
.proc_handler = proc_tcp_fastopen_key,
},
+ {
+ .procname = "tcp_fastopen_blackhole_timeout_sec",
+ .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tfo_blackhole_detect_timeout,
+ .extra1 = &zero,
+ },
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 4eae44a..de470e7 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -422,25 +422,16 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
* TFO connection with data exchanges.
*/

-/* Default to 1hr */
-unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
-static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
-static unsigned long tfo_active_disable_stamp __read_mostly;
-
/* Disable active TFO and record current jiffies and
* tfo_active_disable_times
*/
void tcp_fastopen_active_disable(struct sock *sk)
{
- atomic_inc(&tfo_active_disable_times);
- tfo_active_disable_stamp = jiffies;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
-}
+ struct net *net = sock_net(sk);

-/* Reset tfo_active_disable_times to 0 */
-void tcp_fastopen_active_timeout_reset(void)
-{
- atomic_set(&tfo_active_disable_times, 0);
+ atomic_inc(&net->ipv4.tfo_active_disable_times);
+ net->ipv4.tfo_active_disable_stamp = jiffies;
+ NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
}

/* Calculate timeout for tfo active disable
@@ -449,17 +440,18 @@ void tcp_fastopen_active_timeout_reset(void)
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- int tfo_da_times = atomic_read(&tfo_active_disable_times);
- int multiplier;
+ unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
unsigned long timeout;
+ int multiplier;

if (!tfo_da_times)
return false;

/* Limit timout to max: 2^6 * initial timeout */
multiplier = 1 << min(tfo_da_times - 1, 6);
- timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
- if (time_before(jiffies, tfo_active_disable_stamp + timeout))
+ timeout = multiplier * tfo_bh_timeout * HZ;
+ if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
return true;

/* Mark check bit so we can check for successful active TFO
@@ -495,10 +487,10 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
}
}
} else if (tp->syn_fastopen_ch &&
- atomic_read(&tfo_active_disable_times)) {
+ atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
dst = sk_dst_get(sk);
if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
dst_release(dst);
}
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 49c74c0..ad3b5bb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2474,6 +2474,8 @@ static int __net_init tcp_sk_init(struct net *net)

net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
+ net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);

return 0;
fail:
--
1.8.3.1



2017-09-22 13:49:46

by Haishuang Yan

[permalink] [raw]
Subject: [PATCH v4 2/3] ipv4: Namespaceify tcp_fastopen_key knob

Different namespace application might require different tcp_fastopen_key
independently of the host.

David Miller pointed out there is a leak without releasing the context
of tcp_fastopen_key during netns teardown. So add the release action in
exit_batch path.

Tested:
1. Container namespace:
# cat /proc/sys/net/ipv4/tcp_fastopen_key:
2817fff2-f803cf97-eadfd1f3-78c0992b

cookie key in tcp syn packets:
Fast Open Cookie
Kind: TCP Fast Open Cookie (34)
Length: 10
Fast Open Cookie: 1e5dd82a8c492ca9

2. Host:
# cat /proc/sys/net/ipv4/tcp_fastopen_key:
107d7c5f-68eb2ac7-02fb06e6-ed341702

cookie key in tcp syn packets:
Fast Open Cookie
Kind: TCP Fast Open Cookie (34)
Length: 10
Fast Open Cookie: e213c02bf0afbc8a

Signed-off-by: Haishuang Yan <[email protected]>
---
include/net/netns/ipv4.h | 4 +++
include/net/tcp.h | 6 ++---
net/ipv4/af_inet.c | 2 +-
net/ipv4/sysctl_net_ipv4.c | 26 +++++++++----------
net/ipv4/tcp.c | 2 +-
net/ipv4/tcp_fastopen.c | 64 +++++++++++++++++++++++++++++++---------------
net/ipv4/tcp_ipv4.c | 6 +++++
7 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ce6dde0..66b8335 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -36,6 +36,8 @@ struct inet_timewait_death_row {
int sysctl_max_tw_buckets;
};

+struct tcp_fastopen_context;
+
struct netns_ipv4 {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -128,6 +130,8 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
+ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
+ spinlock_t tcp_fastopen_ctx_lock;

#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f628967..e27bd18 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1556,13 +1556,13 @@ struct tcp_fastopen_request {
};
void tcp_free_fastopen_req(struct tcp_sock *tp);

-extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+void tcp_fastopen_ctx_destroy(struct net *net);
+int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct tcp_fastopen_cookie *foc);
-void tcp_fastopen_init_key_once(bool publish);
+void tcp_fastopen_init_key_once(struct net *net);
bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ddd126d..43a1bbe 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog)
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(sock_net(sk));
}

err = inet_csk_listen_start(sk, backlog);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e31e853c..20e19fe 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -251,10 +251,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
return ret;
}

-static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
+static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
int ret;
@@ -265,7 +267,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
return -ENOMEM;

rcu_read_lock();
- ctxt = rcu_dereference(tcp_fastopen_ctx);
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
@@ -282,12 +284,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
ret = -EINVAL;
goto bad_key;
}
- /* Generate a dummy secret but don't publish it. This
- * is needed so we don't regenerate a new key on the
- * first invocation of tcp_fastopen_cookie_gen
- */
- tcp_fastopen_init_key_once(false);
- tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+ tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH);
}

bad_key:
@@ -401,12 +398,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_fastopen_key",
- .mode = 0600,
- .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
- .proc_handler = proc_tcp_fastopen_key,
- },
- {
.procname = "tcp_fastopen_blackhole_timeout_sec",
.data = &sysctl_tcp_fastopen_blackhole_timeout,
.maxlen = sizeof(int),
@@ -1085,6 +1076,13 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "tcp_fastopen_key",
+ .mode = 0600,
+ .data = &init_net.ipv4.sysctl_tcp_fastopen,
+ .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+ .proc_handler = proc_tcp_fastopen_key,
+ },
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dac56c4..23225c9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(net);

fastopen_queue_tune(sk, val);
} else {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 31b08ec..4eae44a 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -9,13 +9,18 @@
#include <net/inetpeer.h>
#include <net/tcp.h>

-struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-
-static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
-
-void tcp_fastopen_init_key_once(bool publish)
+void tcp_fastopen_init_key_once(struct net *net)
{
- static u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct tcp_fastopen_context *ctxt;
+
+ rcu_read_lock();
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+ if (ctxt) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();

/* tcp_fastopen_reset_cipher publishes the new context
* atomically, so we allow this race happening here.
@@ -23,8 +28,8 @@ void tcp_fastopen_init_key_once(bool publish)
* All call sites of tcp_fastopen_cookie_gen also check
* for a valid cookie, so this is an acceptable risk.
*/
- if (net_get_random_once(key, sizeof(key)) && publish)
- tcp_fastopen_reset_cipher(key, sizeof(key));
+ get_random_bytes(key, sizeof(key));
+ tcp_fastopen_reset_cipher(net, key, sizeof(key));
}

static void tcp_fastopen_ctx_free(struct rcu_head *head)
@@ -35,7 +40,22 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
kfree(ctx);
}

-int tcp_fastopen_reset_cipher(void *key, unsigned int len)
+void tcp_fastopen_ctx_destroy(struct net *net)
+{
+ struct tcp_fastopen_context *ctxt;
+
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ if (ctxt)
+ call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
+}
+
+int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len)
{
int err;
struct tcp_fastopen_context *ctx, *octx;
@@ -59,26 +79,27 @@ int tcp_fastopen_reset_cipher(void *key, unsigned int len)
}
memcpy(ctx->key, key, len);

- spin_lock(&tcp_fastopen_ctx_lock);
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);

- octx = rcu_dereference_protected(tcp_fastopen_ctx,
- lockdep_is_held(&tcp_fastopen_ctx_lock));
- rcu_assign_pointer(tcp_fastopen_ctx, ctx);
- spin_unlock(&tcp_fastopen_ctx_lock);
+ octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);

if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
return err;
}

-static bool __tcp_fastopen_cookie_gen(const void *path,
+static bool __tcp_fastopen_cookie_gen(struct net *net,
+ const void *path,
struct tcp_fastopen_cookie *foc)
{
struct tcp_fastopen_context *ctx;
bool ok = false;

rcu_read_lock();
- ctx = rcu_dereference(tcp_fastopen_ctx);
+ ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctx) {
crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
@@ -94,7 +115,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
*
* XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
*/
-static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+static bool tcp_fastopen_cookie_gen(struct net *net,
+ struct request_sock *req,
struct sk_buff *syn,
struct tcp_fastopen_cookie *foc)
{
@@ -102,7 +124,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct iphdr *iph = ip_hdr(syn);

__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- return __tcp_fastopen_cookie_gen(path, foc);
+ return __tcp_fastopen_cookie_gen(net, path, foc);
}

#if IS_ENABLED(CONFIG_IPV6)
@@ -110,13 +132,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
struct tcp_fastopen_cookie tmp;

- if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+ if (__tcp_fastopen_cookie_gen(net, &ip6h->saddr, &tmp)) {
struct in6_addr *buf = &tmp.addr;
int i;

for (i = 0; i < 4; i++)
buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- return __tcp_fastopen_cookie_gen(buf, foc);
+ return __tcp_fastopen_cookie_gen(net, buf, foc);
}
}
#endif
@@ -296,7 +318,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
goto fastopen;

if (foc->len >= 0 && /* Client presents or requests a cookie */
- tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+ tcp_fastopen_cookie_gen(sock_net(sk), req, skb, &valid_foc) &&
foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
foc->len == valid_foc.len &&
!memcmp(foc->val, valid_foc.val, foc->len)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 88409b1..49c74c0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2473,6 +2473,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_timestamps = 1;

net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
+ spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);

return 0;
fail:
@@ -2483,7 +2484,12 @@ static int __net_init tcp_sk_init(struct net *net)

static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
+ struct net *net;
+
inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
+ list_for_each_entry(net, net_exit_list, exit_list)
+ tcp_fastopen_ctx_destroy(net);
}

static struct pernet_operations __net_initdata tcp_sk_ops = {
--
1.8.3.1



2017-09-25 23:24:52

by David Miller

[permalink] [raw]
Subject: Re: [PATCH v4 2/3] ipv4: Namespaceify tcp_fastopen_key knob

From: Haishuang Yan <[email protected]>
Date: Fri, 22 Sep 2017 21:48:43 +0800

> @@ -9,13 +9,18 @@
> #include <net/inetpeer.h>
> #include <net/tcp.h>
>
> -struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
> -
> -static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
> -
> -void tcp_fastopen_init_key_once(bool publish)
> +void tcp_fastopen_init_key_once(struct net *net)

Why did you remove the 'publish' logic from this function?

2017-09-26 01:26:02

by Haishuang Yan

[permalink] [raw]
Subject: Re: [PATCH v4 2/3] ipv4: Namespaceify tcp_fastopen_key knob



> On 2017??9??26??, at ????7:24, David Miller <[email protected]> wrote:
>
> From: Haishuang Yan <[email protected]>
> Date: Fri, 22 Sep 2017 21:48:43 +0800
>
>> @@ -9,13 +9,18 @@
>> #include <net/inetpeer.h>
>> #include <net/tcp.h>
>>
>> -struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
>> -
>> -static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
>> -
>> -void tcp_fastopen_init_key_once(bool publish)
>> +void tcp_fastopen_init_key_once(struct net *net)
>
> Why did you remove the 'publish' logic from this function?
>

I think this logic is not necessary now, in proc_tcp_fastopen_key, I have removed
tcp_fastopen_init_key_once(false) where the ??publish?? is false:

- /* Generate a dummy secret but don't publish it. This
- * is needed so we don't regenerate a new key on the
- * first invocation of tcp_fastopen_cookie_gen
- */
- tcp_fastopen_init_key_once(false);
- tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+ tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH);

It said we don't regenerate a new key on first invocation of tcp_fastopen_cookie_gen,
but in tcp_fastopen_cookie_gen??it didn??t call tcp_fastopen_init_key_once since
from commit dfea2aa654243 (tcp: Do not call tcp_fastopen_reset_cipher from interrupt context)??

And in other places where call tcp_fastopen_init_key_once, the ??publish?? is always true:

--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog)
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(sock_net(sk));
}

--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(net);

fastopen_queue_tune(sk, val);
} else {


So I deleted ??publish?? logic to ensure it was always true.








2017-09-26 18:18:54

by David Miller

[permalink] [raw]
Subject: Re: [PATCH v4 2/3] ipv4: Namespaceify tcp_fastopen_key knob

From: 严海双 <[email protected]>
Date: Tue, 26 Sep 2017 09:25:51 +0800

>> On 2017年9月26日, at 上午7:24, David Miller <[email protected]> wrote:
>>
>> From: Haishuang Yan <[email protected]>
>> Date: Fri, 22 Sep 2017 21:48:43 +0800
>>
>>> @@ -9,13 +9,18 @@
>>> #include <net/inetpeer.h>
>>> #include <net/tcp.h>
>>>
>>> -struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
>>> -
>>> -static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
>>> -
>>> -void tcp_fastopen_init_key_once(bool publish)
>>> +void tcp_fastopen_init_key_once(struct net *net)
>>
>> Why did you remove the 'publish' logic from this function?
>>
>
> I think this logic is not necessary now, in proc_tcp_fastopen_key, I have removed
> tcp_fastopen_init_key_once(false) where the ‘publish’ is false:
>
> - /* Generate a dummy secret but don't publish it. This
> - * is needed so we don't regenerate a new key on the
> - * first invocation of tcp_fastopen_cookie_gen
> - */
> - tcp_fastopen_init_key_once(false);
> - tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
> + tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH);
>
> It said we don't regenerate a new key on first invocation of tcp_fastopen_cookie_gen,
> but in tcp_fastopen_cookie_gen,it didn’t call tcp_fastopen_init_key_once since
> from commit dfea2aa654243 (tcp: Do not call tcp_fastopen_reset_cipher from interrupt context):
>
> And in other places where call tcp_fastopen_init_key_once, the ‘publish’ is always true:

Ok, this simplification seems legitimate.

But it is unrelated to this namespacification. So it should be in a separate patch,
and should be documented well in the commit message using the great explanation you
gave to me above.

Please respin this series, with this patch #2 split up into two changes.

Thank you.

2017-09-27 01:06:03

by Haishuang Yan

[permalink] [raw]
Subject: Re: [PATCH v4 2/3] ipv4: Namespaceify tcp_fastopen_key knob



> On 2017??9??27??, at ????2:18, David Miller <[email protected]> wrote:
>
> From: ?Ϻ?˫ <[email protected]>
> Date: Tue, 26 Sep 2017 09:25:51 +0800
>
>>> On 2017??9??26??, at ????7:24, David Miller <[email protected]> wrote:
>>>
>>> From: Haishuang Yan <[email protected]>
>>> Date: Fri, 22 Sep 2017 21:48:43 +0800
>>>
>>>> @@ -9,13 +9,18 @@
>>>> #include <net/inetpeer.h>
>>>> #include <net/tcp.h>
>>>>
>>>> -struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
>>>> -
>>>> -static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
>>>> -
>>>> -void tcp_fastopen_init_key_once(bool publish)
>>>> +void tcp_fastopen_init_key_once(struct net *net)
>>>
>>> Why did you remove the 'publish' logic from this function?
>>>
>>
>> I think this logic is not necessary now, in proc_tcp_fastopen_key, I have removed
>> tcp_fastopen_init_key_once(false) where the ??publish?? is false:
>>
>> - /* Generate a dummy secret but don't publish it. This
>> - * is needed so we don't regenerate a new key on the
>> - * first invocation of tcp_fastopen_cookie_gen
>> - */
>> - tcp_fastopen_init_key_once(false);
>> - tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
>> + tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH);
>>
>> It said we don't regenerate a new key on first invocation of tcp_fastopen_cookie_gen,
>> but in tcp_fastopen_cookie_gen??it didn??t call tcp_fastopen_init_key_once since
>> from commit dfea2aa654243 (tcp: Do not call tcp_fastopen_reset_cipher from interrupt context)??
>>
>> And in other places where call tcp_fastopen_init_key_once, the ??publish?? is always true:
>
> Ok, this simplification seems legitimate.
>
> But it is unrelated to this namespacification. So it should be in a separate patch,
> and should be documented well in the commit message using the great explanation you
> gave to me above.
>
> Please respin this series, with this patch #2 split up into two changes.
>
> Thank you.

Okay?? thanks David for advise. I will split the patch #2 in next commit.