Currently, userspace is able to initiate costly high-order allocation in
kernel sending large broadcast netlink message, which is considered
undesirable. At the same time, unicast message are safe in this regard,
because they uses vmalloc-ed memory.
This series introduces changes, that allow broadcast messages to be
allocated with vmalloc() as well as unicast.
Jan Dakinevich (3):
skbuff: use kvfree() to deallocate head
netlink: always use vmapped memory for skb data
netlink: use generic skb_set_owner_r()
include/linux/netlink.h | 16 ----------------
net/core/skbuff.c | 2 +-
net/ipv4/fib_frontend.c | 2 +-
net/netfilter/nfnetlink.c | 2 +-
net/netlink/af_netlink.c | 39 +++++++--------------------------------
5 files changed, 10 insertions(+), 51 deletions(-)
--
2.1.4
Since skb destructor is not used for data deallocating,
netlink_skb_set_owner_r() almost completely repeates generic
skb_set_owner_r(). Thus, both netlink_skb_set_owner_r() and
netlink_skb_destructor() are not required anymore.
Signed-off-by: Jan Dakinevich <[email protected]>
---
net/netlink/af_netlink.c | 23 ++++-------------------
1 file changed, 4 insertions(+), 19 deletions(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 04a3457..b0c2eb2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -358,21 +358,6 @@ static void netlink_rcv_wake(struct sock *sk)
wake_up_interruptible(&nlk->wait);
}
-static void netlink_skb_destructor(struct sk_buff *skb)
-{
- if (skb->sk != NULL)
- sock_rfree(skb);
-}
-
-static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
-{
- WARN_ON(skb->sk != NULL);
- skb->sk = sk;
- skb->destructor = netlink_skb_destructor;
- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
- sk_mem_charge(sk, skb->truesize);
-}
-
static void netlink_sock_destruct(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
@@ -1225,7 +1210,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
}
return 1;
}
- netlink_skb_set_owner_r(skb, sk);
+ skb_set_owner_r(skb, sk);
return 0;
}
@@ -1286,7 +1271,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
ret = -ECONNREFUSED;
if (nlk->netlink_rcv != NULL) {
ret = skb->len;
- netlink_skb_set_owner_r(skb, sk);
+ skb_set_owner_r(skb, sk);
NETLINK_CB(skb).sk = ssk;
netlink_deliver_tap_kernel(sk, ssk, skb);
nlk->netlink_rcv(skb);
@@ -1367,7 +1352,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
- netlink_skb_set_owner_r(skb, sk);
+ skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
}
@@ -2227,7 +2212,7 @@ static int netlink_dump(struct sock *sk)
* single netdev. The outcome is MSG_TRUNC error.
*/
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
- netlink_skb_set_owner_r(skb, sk);
+ skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
cb->extack = &extack;
--
2.1.4
If skb buffer was allocated using vmalloc() it will make simple its
further deallocation.
Signed-off-by: Jan Dakinevich <[email protected]>
---
net/core/skbuff.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0338820..55eac01 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -588,7 +588,7 @@ static void skb_free_head(struct sk_buff *skb)
if (skb->head_frag)
skb_free_frag(head);
else
- kfree(head);
+ kvfree(head);
}
static void skb_release_data(struct sk_buff *skb)
--
2.1.4
Don't make an exception for broadcast skb and allocate buffer for it in
the same way as for unicast skb.
- this makes needless calling of special destructor to free memory
under ->head,
- ...then, there is no need to reassign this destructor to cloned skb,
- ...then, netlink_skb_clone() become equal to generic skb_clone()
and can be dropped.
Signed-off-by: Jan Dakinevich <[email protected]>
---
include/linux/netlink.h | 16 ----------------
net/ipv4/fib_frontend.c | 2 +-
net/netfilter/nfnetlink.c | 2 +-
net/netlink/af_netlink.c | 16 +++-------------
4 files changed, 5 insertions(+), 31 deletions(-)
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 205fa7b..daacffc 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -146,22 +146,6 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
int netlink_sendskb(struct sock *sk, struct sk_buff *skb);
-static inline struct sk_buff *
-netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
-{
- struct sk_buff *nskb;
-
- nskb = skb_clone(skb, gfp_mask);
- if (!nskb)
- return NULL;
-
- /* This is a large skb, set destructor callback to release head */
- if (is_vmalloc_addr(skb->head))
- nskb->destructor = skb->destructor;
-
- return nskb;
-}
-
/*
* skb should fit one page. This choice is good for headerless malloc.
* But we should limit to 8K so that userspace does not have to
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e8bc939..cbbd75d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1371,7 +1371,7 @@ static void nl_fib_input(struct sk_buff *skb)
nlmsg_len(nlh) < sizeof(*frn))
return;
- skb = netlink_skb_clone(skb, GFP_KERNEL);
+ skb = skb_clone(skb, GFP_KERNEL);
if (!skb)
return;
nlh = nlmsg_hdr(skb);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 4abbb45..6ae22c9c 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -311,7 +311,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
replay:
status = 0;
- skb = netlink_skb_clone(oskb, GFP_KERNEL);
+ skb = skb_clone(oskb, GFP_KERNEL);
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM, NULL);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 90b2ab9..04a3457 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -360,13 +360,6 @@ static void netlink_rcv_wake(struct sock *sk)
static void netlink_skb_destructor(struct sk_buff *skb)
{
- if (is_vmalloc_addr(skb->head)) {
- if (!skb->cloned ||
- !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
- vfree(skb->head);
-
- skb->head = NULL;
- }
if (skb->sk != NULL)
sock_rfree(skb);
}
@@ -1164,13 +1157,12 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
return sock;
}
-static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
- int broadcast)
+static struct sk_buff *netlink_alloc_large_skb(unsigned int size)
{
struct sk_buff *skb;
void *data;
- if (size <= NLMSG_GOODSIZE || broadcast)
+ if (size <= NLMSG_GOODSIZE)
return alloc_skb(size, GFP_KERNEL);
size = SKB_DATA_ALIGN(size) +
@@ -1183,8 +1175,6 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
skb = __build_skb(data, size);
if (skb == NULL)
vfree(data);
- else
- skb->destructor = netlink_skb_destructor;
return skb;
}
@@ -1889,7 +1879,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (len > sk->sk_sndbuf - 32)
goto out;
err = -ENOBUFS;
- skb = netlink_alloc_large_skb(len, dst_group);
+ skb = netlink_alloc_large_skb(len);
if (skb == NULL)
goto out;
--
2.1.4