Track per-peer fragment memory usage, using the existing per-fqdir
memory tracking logic.
Signed-off-by: Richard Gobert <[email protected]>
---
include/net/inet_frag.h | 11 ++------
include/net/inetpeer.h | 1 +
net/ieee802154/6lowpan/reassembly.c | 2 +-
net/ipv4/inet_fragment.c | 36 ++++++++++++++++++++-----
net/ipv4/inetpeer.c | 1 +
net/ipv4/ip_fragment.c | 4 +--
net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +-
net/ipv6/reassembly.c | 2 +-
8 files changed, 38 insertions(+), 21 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 05d95fad8a1a..077a0ec78a58 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -155,15 +155,8 @@ static inline long frag_mem_limit(const struct fqdir *fqdir)
return atomic_long_read(&fqdir->mem);
}
-static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
-{
- atomic_long_sub(val, &fqdir->mem);
-}
-
-static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
-{
- atomic_long_add(val, &fqdir->mem);
-}
+void sub_frag_mem_limit(struct inet_frag_queue *q, long val);
+void add_frag_mem_limit(struct inet_frag_queue *q, long val);
/* RFC 3168 support :
* We want to check ECN values of all fragments, do detect invalid combinations.
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 74ff688568a0..1c602a706742 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,6 +41,7 @@ struct inet_peer {
u32 rate_tokens; /* rate limiting for ICMP */
u32 n_redirects;
unsigned long rate_last;
+ atomic_long_t frag_mem;
/*
* Once inet_peer is queued for deletion (refcnt == 0), following field
* is not available: rid
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index a91283d1e5bf..0bf207e94082 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -135,7 +135,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
fq->q.flags |= INET_FRAG_FIRST_IN;
fq->q.meat += skb->len;
- add_frag_mem_limit(fq->q.fqdir, skb->truesize);
+ add_frag_mem_limit(&fq->q, skb->truesize);
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c3ec1dbe7081..8b8d77d548d4 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -250,6 +250,29 @@ void inet_frag_kill(struct inet_frag_queue *fq)
}
EXPORT_SYMBOL(inet_frag_kill);
+static inline long peer_mem_limit(const struct inet_frag_queue *q)
+{
+ if (!q->peer)
+ return 0;
+ return atomic_long_read(&q->peer->frag_mem);
+}
+
+void sub_frag_mem_limit(struct inet_frag_queue *q, long val)
+{
+ if (q->peer)
+ atomic_long_sub(val, &q->peer->frag_mem);
+ atomic_long_sub(val, &q->fqdir->mem);
+}
+EXPORT_SYMBOL(sub_frag_mem_limit);
+
+void add_frag_mem_limit(struct inet_frag_queue *q, long val)
+{
+ if (q->peer)
+ atomic_long_add(val, &q->peer->frag_mem);
+ atomic_long_add(val, &q->fqdir->mem);
+}
+EXPORT_SYMBOL(add_frag_mem_limit);
+
static void inet_frag_destroy_rcu(struct rcu_head *head)
{
struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
@@ -306,9 +329,8 @@ void inet_frag_destroy(struct inet_frag_queue *q)
sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
sum = sum_truesize + f->qsize;
+ sub_frag_mem_limit(q, sum);
inet_frag_free(q);
-
- sub_frag_mem_limit(fqdir, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
@@ -324,7 +346,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
q->fqdir = fqdir;
f->constructor(q, arg);
- add_frag_mem_limit(fqdir, f->qsize);
+ add_frag_mem_limit(q, f->qsize);
timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
@@ -483,7 +505,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
delta += head->truesize;
if (delta)
- add_frag_mem_limit(q->fqdir, delta);
+ add_frag_mem_limit(q, delta);
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
@@ -505,7 +527,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
head->truesize += clone->truesize;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(q->fqdir, clone->truesize);
+ add_frag_mem_limit(q, clone->truesize);
skb_shinfo(head)->frag_list = clone;
nextp = &clone->next;
} else {
@@ -575,7 +597,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
rbn = rbnext;
}
}
- sub_frag_mem_limit(q->fqdir, sum_truesize);
+ sub_frag_mem_limit(q, sum_truesize);
*nextp = NULL;
skb_mark_not_on_list(head);
@@ -604,7 +626,7 @@ struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
if (head == q->fragments_tail)
q->fragments_tail = NULL;
- sub_frag_mem_limit(q->fqdir, head->truesize);
+ sub_frag_mem_limit(q, head->truesize);
return head;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e9fed83e9b3c..6e7325dba417 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
p->dtime = (__u32)jiffies;
refcount_set(&p->refcnt, 2);
atomic_set(&p->rid, 0);
+ atomic_long_set(&p->frag_mem, 0);
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
p->n_redirects = 0;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d0c22c41cf26..e35061f6aadb 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -242,7 +242,7 @@ static int ip_frag_reinit(struct ipq *qp)
}
sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
- sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
+ sub_frag_mem_limit(&qp->q, sum_truesize);
qp->q.flags = 0;
qp->q.len = 0;
@@ -339,7 +339,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->q.mono_delivery_time = skb->mono_delivery_time;
qp->q.meat += skb->len;
qp->ecn |= ecn;
- add_frag_mem_limit(qp->q.fqdir, skb->truesize);
+ add_frag_mem_limit(&qp->q, skb->truesize);
if (offset == 0)
qp->q.flags |= INET_FRAG_FIRST_IN;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 7dd3629dd19e..11ce2335c584 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -269,7 +269,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
- add_frag_mem_limit(fq->q.fqdir, skb->truesize);
+ add_frag_mem_limit(&fq->q, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ff866f2a879e..cd4ba6cc956b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -197,7 +197,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->q.mono_delivery_time = skb->mono_delivery_time;
fq->q.meat += skb->len;
fq->ecn |= ecn;
- add_frag_mem_limit(fq->q.fqdir, skb->truesize);
+ add_frag_mem_limit(&fq->q, skb->truesize);
fragsize = -skb_network_offset(skb) + skb->len;
if (fragsize > fq->q.max_size)
--
2.36.1
On Mon, Aug 29, 2022 at 4:48 AM Richard Gobert <[email protected]> wrote:
>
> Track per-peer fragment memory usage, using the existing per-fqdir
> memory tracking logic.
This is a rather terse changelog.
We tried to get rid of any dependence over inetpeer, which is not
resistant against DDOS attacks.
So I would not add a new dependency.
Also, tracking memory per peer will not really help in case of bursts ?
>
> Signed-off-by: Richard Gobert <[email protected]>
> ---
> include/net/inet_frag.h | 11 ++------
> include/net/inetpeer.h | 1 +
> net/ieee802154/6lowpan/reassembly.c | 2 +-
> net/ipv4/inet_fragment.c | 36 ++++++++++++++++++++-----
> net/ipv4/inetpeer.c | 1 +
> net/ipv4/ip_fragment.c | 4 +--
> net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +-
> net/ipv6/reassembly.c | 2 +-
> 8 files changed, 38 insertions(+), 21 deletions(-)
>
> diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
> index 05d95fad8a1a..077a0ec78a58 100644
> --- a/include/net/inet_frag.h
> +++ b/include/net/inet_frag.h
> @@ -155,15 +155,8 @@ static inline long frag_mem_limit(const struct fqdir *fqdir)
> return atomic_long_read(&fqdir->mem);
> }
>
> -static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
> -{
> - atomic_long_sub(val, &fqdir->mem);
> -}
> -
> -static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
> -{
> - atomic_long_add(val, &fqdir->mem);
> -}
> +void sub_frag_mem_limit(struct inet_frag_queue *q, long val);
> +void add_frag_mem_limit(struct inet_frag_queue *q, long val);
>
> /* RFC 3168 support :
> * We want to check ECN values of all fragments, do detect invalid combinations.
> diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
> index 74ff688568a0..1c602a706742 100644
> --- a/include/net/inetpeer.h
> +++ b/include/net/inetpeer.h
> @@ -41,6 +41,7 @@ struct inet_peer {
> u32 rate_tokens; /* rate limiting for ICMP */
> u32 n_redirects;
> unsigned long rate_last;
> + atomic_long_t frag_mem;
> /*
> * Once inet_peer is queued for deletion (refcnt == 0), following field
> * is not available: rid
> diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
> index a91283d1e5bf..0bf207e94082 100644
> --- a/net/ieee802154/6lowpan/reassembly.c
> +++ b/net/ieee802154/6lowpan/reassembly.c
> @@ -135,7 +135,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
> fq->q.flags |= INET_FRAG_FIRST_IN;
>
> fq->q.meat += skb->len;
> - add_frag_mem_limit(fq->q.fqdir, skb->truesize);
> + add_frag_mem_limit(&fq->q, skb->truesize);
>
> if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
> fq->q.meat == fq->q.len) {
> diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
> index c3ec1dbe7081..8b8d77d548d4 100644
> --- a/net/ipv4/inet_fragment.c
> +++ b/net/ipv4/inet_fragment.c
> @@ -250,6 +250,29 @@ void inet_frag_kill(struct inet_frag_queue *fq)
> }
> EXPORT_SYMBOL(inet_frag_kill);
>
> +static inline long peer_mem_limit(const struct inet_frag_queue *q)
> +{
> + if (!q->peer)
> + return 0;
> + return atomic_long_read(&q->peer->frag_mem);
> +}
> +
> +void sub_frag_mem_limit(struct inet_frag_queue *q, long val)
> +{
> + if (q->peer)
> + atomic_long_sub(val, &q->peer->frag_mem);
> + atomic_long_sub(val, &q->fqdir->mem);
> +}
> +EXPORT_SYMBOL(sub_frag_mem_limit);
> +
> +void add_frag_mem_limit(struct inet_frag_queue *q, long val)
> +{
> + if (q->peer)
> + atomic_long_add(val, &q->peer->frag_mem);
> + atomic_long_add(val, &q->fqdir->mem);
> +}
> +EXPORT_SYMBOL(add_frag_mem_limit);
> +
> static void inet_frag_destroy_rcu(struct rcu_head *head)
> {
> struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
> @@ -306,9 +329,8 @@ void inet_frag_destroy(struct inet_frag_queue *q)
> sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
> sum = sum_truesize + f->qsize;
>
> + sub_frag_mem_limit(q, sum);
> inet_frag_free(q);
> -
> - sub_frag_mem_limit(fqdir, sum);
> }
> EXPORT_SYMBOL(inet_frag_destroy);
>
> @@ -324,7 +346,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
>
> q->fqdir = fqdir;
> f->constructor(q, arg);
> - add_frag_mem_limit(fqdir, f->qsize);
> + add_frag_mem_limit(q, f->qsize);
>
> timer_setup(&q->timer, f->frag_expire, 0);
> spin_lock_init(&q->lock);
> @@ -483,7 +505,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
>
> delta += head->truesize;
> if (delta)
> - add_frag_mem_limit(q->fqdir, delta);
> + add_frag_mem_limit(q, delta);
>
> /* If the first fragment is fragmented itself, we split
> * it to two chunks: the first with data and paged part
> @@ -505,7 +527,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
> head->truesize += clone->truesize;
> clone->csum = 0;
> clone->ip_summed = head->ip_summed;
> - add_frag_mem_limit(q->fqdir, clone->truesize);
> + add_frag_mem_limit(q, clone->truesize);
> skb_shinfo(head)->frag_list = clone;
> nextp = &clone->next;
> } else {
> @@ -575,7 +597,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
> rbn = rbnext;
> }
> }
> - sub_frag_mem_limit(q->fqdir, sum_truesize);
> + sub_frag_mem_limit(q, sum_truesize);
>
> *nextp = NULL;
> skb_mark_not_on_list(head);
> @@ -604,7 +626,7 @@ struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
> if (head == q->fragments_tail)
> q->fragments_tail = NULL;
>
> - sub_frag_mem_limit(q->fqdir, head->truesize);
> + sub_frag_mem_limit(q, head->truesize);
>
> return head;
> }
> diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
> index e9fed83e9b3c..6e7325dba417 100644
> --- a/net/ipv4/inetpeer.c
> +++ b/net/ipv4/inetpeer.c
> @@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
> p->dtime = (__u32)jiffies;
> refcount_set(&p->refcnt, 2);
> atomic_set(&p->rid, 0);
> + atomic_long_set(&p->frag_mem, 0);
> p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
> p->rate_tokens = 0;
> p->n_redirects = 0;
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index d0c22c41cf26..e35061f6aadb 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -242,7 +242,7 @@ static int ip_frag_reinit(struct ipq *qp)
> }
>
> sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
> - sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
> + sub_frag_mem_limit(&qp->q, sum_truesize);
>
> qp->q.flags = 0;
> qp->q.len = 0;
> @@ -339,7 +339,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
> qp->q.mono_delivery_time = skb->mono_delivery_time;
> qp->q.meat += skb->len;
> qp->ecn |= ecn;
> - add_frag_mem_limit(qp->q.fqdir, skb->truesize);
> + add_frag_mem_limit(&qp->q, skb->truesize);
> if (offset == 0)
> qp->q.flags |= INET_FRAG_FIRST_IN;
>
> diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
> index 7dd3629dd19e..11ce2335c584 100644
> --- a/net/ipv6/netfilter/nf_conntrack_reasm.c
> +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
> @@ -269,7 +269,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
> fq->ecn |= ecn;
> if (payload_len > fq->q.max_size)
> fq->q.max_size = payload_len;
> - add_frag_mem_limit(fq->q.fqdir, skb->truesize);
> + add_frag_mem_limit(&fq->q, skb->truesize);
>
> /* The first fragment.
> * nhoffset is obtained from the first fragment, of course.
> diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
> index ff866f2a879e..cd4ba6cc956b 100644
> --- a/net/ipv6/reassembly.c
> +++ b/net/ipv6/reassembly.c
> @@ -197,7 +197,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
> fq->q.mono_delivery_time = skb->mono_delivery_time;
> fq->q.meat += skb->len;
> fq->ecn |= ecn;
> - add_frag_mem_limit(fq->q.fqdir, skb->truesize);
> + add_frag_mem_limit(&fq->q, skb->truesize);
>
> fragsize = -skb_network_offset(skb) + skb->len;
> if (fragsize > fq->q.max_size)
> --
> 2.36.1
>
On Mon, Aug 29, 2022 at 03:15:47PM -0700, Eric Dumazet wrote:
> We tried to get rid of any dependence over inetpeer, which is not
> resistant against DDOS attacks.
>
> So I would not add a new dependency.
I see your point. What do you suggest doing differently?
The inetpeer mechanism is used for IPv4 frags. If it isn't resistant
against DDoS attacks, can it perhaps be improved?
On Thu, Sep 1, 2022 at 8:03 AM Richard Gobert <[email protected]> wrote:
>
> On Mon, Aug 29, 2022 at 03:15:47PM -0700, Eric Dumazet wrote:
> > We tried to get rid of any dependence over inetpeer, which is not
> > resistant against DDOS attacks.
> >
> > So I would not add a new dependency.
>
> I see your point. What do you suggest doing differently?
>
> The inetpeer mechanism is used for IPv4 frags. If it isn't resistant
> against DDoS attacks, can it perhaps be improved?
It can be disabled if needed, by changing ipfrag_max_dist sysctl.
Quite frankly IPv4 reassembly unit is a toy, I am always surprised
some applications are still relying on IP fragments.
On Thu, Sep 01, 2022 at 09:06:59AM -0700, Eric Dumazet wrote:
> It can be disabled if needed, by changing ipfrag_max_dist sysctl.
I understand your reluctance to add another dependency on inetpeer.
> Quite frankly IPv4 reassembly unit is a toy, I am always surprised
> some applications are still relying on IP fragments.
Do you think there's any room for improvement in IP fragments? I
believe that it is possible to make frags less fragile and prone
to overload in real-world scenarios.