2018-07-22 14:37:22

by shaochun chen

[permalink] [raw]
Subject: [PATCH] netlink: fix memory leak of dump

1) if netlink_dump_start start fail, the memory of c->data will leak.
so free manually after netlink_dump_start return error.

2) In netlink_dump_start, ignore the return of netlink_dump.
Because if cb_running is set to true, cb->dump will be call in anyway.
so if netlink_dump_start start successfully, just return -EINTR.

Signed-off-by: Shaochun Chen <[email protected]>
---
net/netfilter/nf_conntrack_netlink.c | 8 ++++--
net/netfilter/nf_tables_api.c | 41 ++++++++++++++++++++--------
net/netfilter/nfnetlink_acct.c | 8 ++++--
net/netlink/af_netlink.c | 5 +---
4 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 20a2e37c76d1..31db758bdb7a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1243,17 +1243,19 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
.dump = ctnetlink_dump_table,
.done = ctnetlink_done,
};
+ struct ctnetlink_filter *filter = NULL;

if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
- struct ctnetlink_filter *filter;
-
filter = ctnetlink_alloc_filter(cda);
if (IS_ERR(filter))
return PTR_ERR(filter);

c.data = filter;
}
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ err = netlink_dump_start(ctnl, skb, nlh, &c);
+ if (err != -EINTR && filter)
+ kfree(filter);
+ return err;
}

err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 896d4a36081d..4635a841f5f2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2287,10 +2287,9 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
.done = nf_tables_dump_rules_done,
.module = THIS_MODULE,
};
+ struct nft_rule_dump_ctx *ctx = NULL;

if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
- struct nft_rule_dump_ctx *ctx;
-
ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
if (!ctx)
return -ENOMEM;
@@ -2315,7 +2314,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
c.data = ctx;
}

- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ err = nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ if (err != -EINTR && ctx) {
+ kfree(ctx->table);
+ kfree(ctx->chain);
+ kfree(ctx);
+ }
+ return err;
}

table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
@@ -3201,7 +3206,10 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
*ctx_dump = ctx;
c.data = ctx_dump;

- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ err = nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ if (err != -EINTR)
+ kfree(ctx_dump);
+ return err;
}

/* Only accept unspec with dump */
@@ -4016,7 +4024,10 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
dump_ctx->ctx = ctx;

c.data = dump_ctx;
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ err = nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ if (err != -EINTR)
+ kfree(dump_ctx);
+ return err;
}

if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
@@ -5031,18 +5042,22 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
.done = nf_tables_dump_obj_done,
.module = THIS_MODULE,
};
+ struct nft_obj_filter *filter = NULL;

if (nla[NFTA_OBJ_TABLE] ||
nla[NFTA_OBJ_TYPE]) {
- struct nft_obj_filter *filter;
-
filter = nft_obj_filter_alloc(nla);
if (IS_ERR(filter))
return -ENOMEM;

c.data = filter;
}
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ err = nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ if (err != -EINTR && filter) {
+ kfree(filter->table);
+ kfree(filter);
+ }
+ return err;
}

if (!nla[NFTA_OBJ_NAME] ||
@@ -5704,17 +5719,21 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
.done = nf_tables_dump_flowtable_done,
.module = THIS_MODULE,
};
+ struct nft_flowtable_filter *filter = NULL;

if (nla[NFTA_FLOWTABLE_TABLE]) {
- struct nft_flowtable_filter *filter;
-
filter = nft_flowtable_filter_alloc(nla);
if (IS_ERR(filter))
return -ENOMEM;

c.data = filter;
}
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ err = nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ if (err != -EINTR && filter) {
+ kfree(filter->table);
+ kfree(filter);
+ }
+ return err;
}

if (!nla[NFTA_FLOWTABLE_NAME])
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index a0e5adf0b3b6..aecdb9cc1c5d 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -277,17 +277,19 @@ static int nfnl_acct_get(struct net *net, struct sock *nfnl,
.dump = nfnl_acct_dump,
.done = nfnl_acct_done,
};
+ struct nfacct_filter *filter = NULL;

if (tb[NFACCT_FILTER]) {
- struct nfacct_filter *filter;
-
filter = nfacct_filter_alloc(tb[NFACCT_FILTER]);
if (IS_ERR(filter))
return PTR_ERR(filter);

c.data = filter;
}
- return netlink_dump_start(nfnl, skb, nlh, &c);
+ ret = netlink_dump_start(nfnl, skb, nlh, &c);
+ if (ret != -EINTR && filter)
+ kfree(filter);
+ return ret;
}

if (!tb[NFACCT_NAME])
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 393573a99a5a..61450461378c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2320,13 +2320,10 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,

mutex_unlock(nlk->cb_mutex);

- ret = netlink_dump(sk);
+ netlink_dump(sk);

sock_put(sk);

- if (ret)
- return ret;
-
/* We successfully started a dump, by returning -EINTR we
* signal not to send ACK even if it was requested.
*/
--
2.17.1



2018-07-22 16:40:57

by Florian Westphal

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

Shaochun Chen <[email protected]> wrote:

[ CC ing Tom, as he added ->start() ]

> 1) if netlink_dump_start start fail, the memory of c->data will leak.
> so free manually after netlink_dump_start return error.
>
> 2) In netlink_dump_start, ignore the return of netlink_dump.
> Because if cb_running is set to true, cb->dump will be call in anyway.
> so if netlink_dump_start start successfully, just return -EINTR.

This now requires ->start() to not return -EINVAL, else we won't
release resources either, this seems fragile to me.

I can only think of three ways to address this:

1. Kill ->start() and force all users to defer
state allocation to first ->dump() invocation, so existing
->done() can be used to release resources.

OR
2. add ->stop() and have core always pair it with ->start(),
no matter if dump() got called or not, then convert all
places that provide .start to use .stop, not .done.

OR
3. change meaning of ->done() so its always called once ->start()
was invoked (and returned 0), this requires audit of all
places that provide .done to make sure they won't trip.

3) seems to be what Tom intended when he added .start, so probably
best to investigate that first.

2018-07-22 17:09:52

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

From: Florian Westphal <[email protected]>
Date: Sun, 22 Jul 2018 18:39:25 +0200

> 3. change meaning of ->done() so its always called once ->start()
> was invoked (and returned 0), this requires audit of all
> places that provide .done to make sure they won't trip.
>
> 3) seems to be what Tom intended when he added .start, so probably
> best to investigate that first.

Hmmm...

Any time ->start() succeeds, we set cb_running to true.

From that point forward, ->done() will be called at some point at all
of the locations that check if cb_running is true and set it to false.

This may be deferred all the way to socket destruction, but it will
eventually happens.

Nothing sets cb_running to false without invoking the ->done()
callback.

Just because the individual dump invocations don't call ->done() does
not mean it will not eventually happen. The dump callbacks, and thus
the state data, is live across multiple rounds of recvmsg() calls by
the user and that is as-designed.

2018-07-22 18:10:40

by Florian Westphal

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

David Miller <[email protected]> wrote:
> From: Florian Westphal <[email protected]>
> Date: Sun, 22 Jul 2018 18:39:25 +0200
>
> > 3. change meaning of ->done() so its always called once ->start()
> > was invoked (and returned 0), this requires audit of all
> > places that provide .done to make sure they won't trip.
> >
> > 3) seems to be what Tom intended when he added .start, so probably
> > best to investigate that first.
>
> Hmmm...
>
> Any time ->start() succeeds, we set cb_running to true.

Right.

> From that point forward, ->done() will be called at some point at all
> of the locations that check if cb_running is true and set it to false.

Also right, thanks for pointing this out, I missed fact that netlink
core restarts a dump after this.

So 3) is already true which means we should try to see if we can move
all dump-related extra magic into ->start().

Shaochun, can you see if this is possible?

Something along these lines (totally untested), which makes this
a netfilter fix:

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5010,6 +5013,22 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
return filter;
}

+static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+{
+ const struct nlattr * const *nla = cb->data;
+ struct nft_obj_filter *filter = NULL;
+
+ if (nla[NFTA_OBJ_TABLE] ||
+ nla[NFTA_OBJ_TYPE]) {
+ filter = nft_obj_filter_alloc(nla);
+ if (IS_ERR(filter))
+ return -ENOMEM;
+ }
+
+ cb->data = filter;
+ return 0;
+}
+
/* called with rcu_read_lock held */
static int nf_tables_getobj(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
@@ -5028,21 +5047,13 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,

if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = nf_tables_dump_obj_start,
.dump = nf_tables_dump_obj,
.done = nf_tables_dump_obj_done,
.module = THIS_MODULE,
+ .data = (void *)nla,
};

- if (nla[NFTA_OBJ_TABLE] ||
- nla[NFTA_OBJ_TYPE]) {
- struct nft_obj_filter *filter;
-
- filter = nft_obj_filter_alloc(nla);
- if (IS_ERR(filter))
- return -ENOMEM;
-
- c.data = filter;
- }
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}


2018-07-23 02:06:27

by shaochun chen

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

allocate memory in cb->start(), which means passing 'static' variable
through control->data,
then allocate memory in cb->start() according to cb->data (cb->data is
equal to control->data now),
and set the memory back to cb->data which will be used in cb->dump().
It's a bit complicated, please see nf_tables_getset.

2018-07-23 2:09 GMT+08:00 Florian Westphal <[email protected]>:

> David Miller <[email protected]> wrote:
> > From: Florian Westphal <[email protected]>
> > Date: Sun, 22 Jul 2018 18:39:25 +0200
> >
> > > 3. change meaning of ->done() so its always called once ->start()
> > > was invoked (and returned 0), this requires audit of all
> > > places that provide .done to make sure they won't trip.
> > >
> > > 3) seems to be what Tom intended when he added .start, so probably
> > > best to investigate that first.
> >
> > Hmmm...
> >
> > Any time ->start() succeeds, we set cb_running to true.
>
> Right.
>
> > From that point forward, ->done() will be called at some point at all
> > of the locations that check if cb_running is true and set it to false.
>
> Also right, thanks for pointing this out, I missed fact that netlink
> core restarts a dump after this.
>
> So 3) is already true which means we should try to see if we can move
> all dump-related extra magic into ->start().
>
> Shaochun, can you see if this is possible?
>
> Something along these lines (totally untested), which makes this
> a netfilter fix:
>
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -5010,6 +5013,22 @@ nft_obj_filter_alloc(const struct nlattr * const
> nla[])
> return filter;
> }
>
> +static int nf_tables_dump_obj_start(struct netlink_callback *cb)
> +{
> + const struct nlattr * const *nla = cb->data;
> + struct nft_obj_filter *filter = NULL;
> +
> + if (nla[NFTA_OBJ_TABLE] ||
> + nla[NFTA_OBJ_TYPE]) {
> + filter = nft_obj_filter_alloc(nla);
> + if (IS_ERR(filter))
> + return -ENOMEM;
> + }
> +
> + cb->data = filter;
> + return 0;
> +}
> +
> /* called with rcu_read_lock held */
> static int nf_tables_getobj(struct net *net, struct sock *nlsk,
> struct sk_buff *skb, const struct nlmsghdr
> *nlh,
> @@ -5028,21 +5047,13 @@ static int nf_tables_getobj(struct net *net,
> struct sock *nlsk,
>
> if (nlh->nlmsg_flags & NLM_F_DUMP) {
> struct netlink_dump_control c = {
> + .start = nf_tables_dump_obj_start,
> .dump = nf_tables_dump_obj,
> .done = nf_tables_dump_obj_done,
> .module = THIS_MODULE,
> + .data = (void *)nla,
> };
>
> - if (nla[NFTA_OBJ_TABLE] ||
> - nla[NFTA_OBJ_TYPE]) {
> - struct nft_obj_filter *filter;
> -
> - filter = nft_obj_filter_alloc(nla);
> - if (IS_ERR(filter))
> - return -ENOMEM;
> -
> - c.data = filter;
> - }
> return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
> }
>
>

2018-07-23 09:21:27

by Pablo Neira Ayuso

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

On Sun, Jul 22, 2018 at 08:09:10PM +0200, Florian Westphal wrote:
> David Miller <[email protected]> wrote:
> > From: Florian Westphal <[email protected]>
> > Date: Sun, 22 Jul 2018 18:39:25 +0200
> >
> > > 3. change meaning of ->done() so its always called once ->start()
> > > was invoked (and returned 0), this requires audit of all
> > > places that provide .done to make sure they won't trip.
> > >
> > > 3) seems to be what Tom intended when he added .start, so probably
> > > best to investigate that first.
> >
> > Hmmm...
> >
> > Any time ->start() succeeds, we set cb_running to true.
>
> Right.
>
> > From that point forward, ->done() will be called at some point at all
> > of the locations that check if cb_running is true and set it to false.
>
> Also right, thanks for pointing this out, I missed fact that netlink
> core restarts a dump after this.
>
> So 3) is already true which means we should try to see if we can move
> all dump-related extra magic into ->start().
>
> Shaochun, can you see if this is possible?
>
> Something along these lines (totally untested), which makes this
> a netfilter fix:
>
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -5010,6 +5013,22 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
> return filter;
> }
>
> +static int nf_tables_dump_obj_start(struct netlink_callback *cb)
> +{
> + const struct nlattr * const *nla = cb->data;
> + struct nft_obj_filter *filter = NULL;
> +
> + if (nla[NFTA_OBJ_TABLE] ||
> + nla[NFTA_OBJ_TYPE]) {
> + filter = nft_obj_filter_alloc(nla);
> + if (IS_ERR(filter))
> + return -ENOMEM;
> + }
> +
> + cb->data = filter;
> + return 0;
> +}
> +
> /* called with rcu_read_lock held */
> static int nf_tables_getobj(struct net *net, struct sock *nlsk,
> struct sk_buff *skb, const struct nlmsghdr *nlh,
> @@ -5028,21 +5047,13 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
>
> if (nlh->nlmsg_flags & NLM_F_DUMP) {
> struct netlink_dump_control c = {
> + .start = nf_tables_dump_obj_start,
> .dump = nf_tables_dump_obj,
> .done = nf_tables_dump_obj_done,
> .module = THIS_MODULE,
> + .data = (void *)nla,

You cannot do this.

nla is allocated in this stack.

the nla will not be available in the second recv(), it won't be there.

2018-07-23 09:30:02

by Florian Westphal

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

Pablo Neira Ayuso <[email protected]> wrote:
> > diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> > --- a/net/netfilter/nf_tables_api.c
> > +++ b/net/netfilter/nf_tables_api.c
> > @@ -5010,6 +5013,22 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
> > return filter;
> > }
> >
> > +static int nf_tables_dump_obj_start(struct netlink_callback *cb)
> > +{
> > + const struct nlattr * const *nla = cb->data;

On-Stack input.
I can't see how its wrong, ->start() happens from same context as
netlink_dump_start so its valid.

> > + struct nft_obj_filter *filter = NULL;
> > +
> > + if (nla[NFTA_OBJ_TABLE] ||
> > + nla[NFTA_OBJ_TYPE]) {
> > + filter = nft_obj_filter_alloc(nla);
> > + if (IS_ERR(filter))
> > + return -ENOMEM;
> > + }
> > +
> > + cb->data = filter;

And this replaced the on-stack input with dynamically
allocated one, which will be free'd via ->done().

> > /* called with rcu_read_lock held */
> > static int nf_tables_getobj(struct net *net, struct sock *nlsk,
> > struct sk_buff *skb, const struct nlmsghdr *nlh,
> > @@ -5028,21 +5047,13 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
> >
> > if (nlh->nlmsg_flags & NLM_F_DUMP) {
> > struct netlink_dump_control c = {
> > + .start = nf_tables_dump_obj_start,
> > .dump = nf_tables_dump_obj,
> > .done = nf_tables_dump_obj_done,
> > .module = THIS_MODULE,
> > + .data = (void *)nla,
>
> You cannot do this.
>
> nla is allocated in this stack.

Yes.

> the nla will not be available in the second recv(), it won't be there.

Its replaced in ->start().

As David pointed out, once ->start() returns 0 we set cb_running, i.e.
only after successful ->start() netlink core will call ->dump() again.

So I see no problem setting ->data to onstack cookie and then
duplicating it to heap via kmemdup in ->start().

As far as I can see netlink core offers all functionality already,
so we only need to switch netfilter to make use of it.

If you disagree please let me know, otherwise I will cook up
a patch along this pattern for net/netfilter/*.

Thanks.

2018-07-23 09:40:46

by Pablo Neira Ayuso

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

On Mon, Jul 23, 2018 at 11:28:18AM +0200, Florian Westphal wrote:
> Pablo Neira Ayuso <[email protected]> wrote:
> > > diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> > > --- a/net/netfilter/nf_tables_api.c
> > > +++ b/net/netfilter/nf_tables_api.c
> > > @@ -5010,6 +5013,22 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
> > > return filter;
> > > }
> > >
> > > +static int nf_tables_dump_obj_start(struct netlink_callback *cb)
> > > +{
> > > + const struct nlattr * const *nla = cb->data;
>
> On-Stack input.
> I can't see how its wrong, ->start() happens from same context as
> netlink_dump_start so its valid.
>
> > > + struct nft_obj_filter *filter = NULL;
> > > +
> > > + if (nla[NFTA_OBJ_TABLE] ||
> > > + nla[NFTA_OBJ_TYPE]) {
> > > + filter = nft_obj_filter_alloc(nla);
> > > + if (IS_ERR(filter))
> > > + return -ENOMEM;
> > > + }
> > > +
> > > + cb->data = filter;
>
> And this replaced the on-stack input with dynamically
> allocated one, which will be free'd via ->done().
>
> > > /* called with rcu_read_lock held */
> > > static int nf_tables_getobj(struct net *net, struct sock *nlsk,
> > > struct sk_buff *skb, const struct nlmsghdr *nlh,
> > > @@ -5028,21 +5047,13 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
> > >
> > > if (nlh->nlmsg_flags & NLM_F_DUMP) {
> > > struct netlink_dump_control c = {
> > > + .start = nf_tables_dump_obj_start,
> > > .dump = nf_tables_dump_obj,
> > > .done = nf_tables_dump_obj_done,
> > > .module = THIS_MODULE,
> > > + .data = (void *)nla,
> >
> > You cannot do this.
> >
> > nla is allocated in this stack.
>
> Yes.
>
> > the nla will not be available in the second recv(), it won't be there.
>
> Its replaced in ->start().
>
> As David pointed out, once ->start() returns 0 we set cb_running, i.e.
> only after successful ->start() netlink core will call ->dump() again.
>
> So I see no problem setting ->data to onstack cookie and then
> duplicating it to heap via kmemdup in ->start().
>
> As far as I can see netlink core offers all functionality already,
> so we only need to switch netfilter to make use of it.
>
> If you disagree please let me know, otherwise I will cook up
> a patch along this pattern for net/netfilter/*.

Why not just call ->done from netlink_dump_start() when it fails?

2018-07-23 09:44:14

by Florian Westphal

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

Pablo Neira Ayuso <[email protected]> wrote:
> > As David pointed out, once ->start() returns 0 we set cb_running, i.e.
> > only after successful ->start() netlink core will call ->dump() again.
> >
> > So I see no problem setting ->data to onstack cookie and then
> > duplicating it to heap via kmemdup in ->start().
> >
> > As far as I can see netlink core offers all functionality already,
> > so we only need to switch netfilter to make use of it.
> >
> > If you disagree please let me know, otherwise I will cook up
> > a patch along this pattern for net/netfilter/*.
>
> Why not just call ->done from netlink_dump_start() when it fails?

Not sure thats safe for all users, we will also still need to call
it in nft_netlink_dump_start and we need to play guess game wrt
EINTR (which can mean 'dump was now started, do not send ack').

2018-07-23 09:49:09

by Pablo Neira Ayuso

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

On Mon, Jul 23, 2018 at 11:42:28AM +0200, Florian Westphal wrote:
> Pablo Neira Ayuso <[email protected]> wrote:
> > > As David pointed out, once ->start() returns 0 we set cb_running, i.e.
> > > only after successful ->start() netlink core will call ->dump() again.
> > >
> > > So I see no problem setting ->data to onstack cookie and then
> > > duplicating it to heap via kmemdup in ->start().
> > >
> > > As far as I can see netlink core offers all functionality already,
> > > so we only need to switch netfilter to make use of it.
> > >
> > > If you disagree please let me know, otherwise I will cook up
> > > a patch along this pattern for net/netfilter/*.
> >
> > Why not just call ->done from netlink_dump_start() when it fails?
>
> Not sure thats safe for all users, we will also still need to call
> it in nft_netlink_dump_start and we need to play guess game wrt
> EINTR (which can mean 'dump was now started, do not send ack').

We can also add another scratchpad area, similar to the ->cb[x] area
that can be initialized before netlink_dump_start()? So we don't need
the data pointer.

By passing the array of attributes, we'll need to do attribute parsing
over and over again from each netlink_dump() call.

2018-07-23 09:53:48

by shaochun chen

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

allocate memory in ->start(), it's not convenient for users.
if call ->done() isn't ok for clean memory when netlink_dump_start() fail,
maybe we should have another function ->clean() to clean memory.

2018-07-23 17:28 GMT+08:00 Florian Westphal <[email protected]>:

> Pablo Neira Ayuso <[email protected]> wrote:
> > > diff --git a/net/netfilter/nf_tables_api.c
> b/net/netfilter/nf_tables_api.c
> > > --- a/net/netfilter/nf_tables_api.c
> > > +++ b/net/netfilter/nf_tables_api.c
> > > @@ -5010,6 +5013,22 @@ nft_obj_filter_alloc(const struct nlattr *
> const nla[])
> > > return filter;
> > > }
> > >
> > > +static int nf_tables_dump_obj_start(struct netlink_callback *cb)
> > > +{
> > > + const struct nlattr * const *nla = cb->data;
>
> On-Stack input.
> I can't see how its wrong, ->start() happens from same context as
> netlink_dump_start so its valid.
>
> > > + struct nft_obj_filter *filter = NULL;
> > > +
> > > + if (nla[NFTA_OBJ_TABLE] ||
> > > + nla[NFTA_OBJ_TYPE]) {
> > > + filter = nft_obj_filter_alloc(nla);
> > > + if (IS_ERR(filter))
> > > + return -ENOMEM;
> > > + }
> > > +
> > > + cb->data = filter;
>
> And this replaced the on-stack input with dynamically
> allocated one, which will be free'd via ->done().
>
> > > /* called with rcu_read_lock held */
> > > static int nf_tables_getobj(struct net *net, struct sock *nlsk,
> > > struct sk_buff *skb, const struct nlmsghdr
> *nlh,
> > > @@ -5028,21 +5047,13 @@ static int nf_tables_getobj(struct net *net,
> struct sock *nlsk,
> > >
> > > if (nlh->nlmsg_flags & NLM_F_DUMP) {
> > > struct netlink_dump_control c = {
> > > + .start = nf_tables_dump_obj_start,
> > > .dump = nf_tables_dump_obj,
> > > .done = nf_tables_dump_obj_done,
> > > .module = THIS_MODULE,
> > > + .data = (void *)nla,
> >
> > You cannot do this.
> >
> > nla is allocated in this stack.
>
> Yes.
>
> > the nla will not be available in the second recv(), it won't be there.
>
> Its replaced in ->start().
>
> As David pointed out, once ->start() returns 0 we set cb_running, i.e.
> only after successful ->start() netlink core will call ->dump() again.
>
> So I see no problem setting ->data to onstack cookie and then
> duplicating it to heap via kmemdup in ->start().
>
> As far as I can see netlink core offers all functionality already,
> so we only need to switch netfilter to make use of it.
>
> If you disagree please let me know, otherwise I will cook up
> a patch along this pattern for net/netfilter/*.
>
> Thanks.
>

2018-07-23 10:36:23

by shaochun chen

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

I have a question: we will try_module_get in __netlink_dump_start(),
but why we need to call try_module_get again in nft_netlink_dump_start ??

2018-07-23 17:52 GMT+08:00 shaochun chen <[email protected]>:

> allocate memory in ->start(), it's not convenient for users.
> if call ->done() isn't ok for clean memory when netlink_dump_start() fail,
> maybe we should have another function ->clean() to clean memory.
>
> 2018-07-23 17:28 GMT+08:00 Florian Westphal <[email protected]>:
>
>> Pablo Neira Ayuso <[email protected]> wrote:
>> > > diff --git a/net/netfilter/nf_tables_api.c
>> b/net/netfilter/nf_tables_api.c
>> > > --- a/net/netfilter/nf_tables_api.c
>> > > +++ b/net/netfilter/nf_tables_api.c
>> > > @@ -5010,6 +5013,22 @@ nft_obj_filter_alloc(const struct nlattr *
>> const nla[])
>> > > return filter;
>> > > }
>> > >
>> > > +static int nf_tables_dump_obj_start(struct netlink_callback *cb)
>> > > +{
>> > > + const struct nlattr * const *nla = cb->data;
>>
>> On-Stack input.
>> I can't see how its wrong, ->start() happens from same context as
>> netlink_dump_start so its valid.
>>
>> > > + struct nft_obj_filter *filter = NULL;
>> > > +
>> > > + if (nla[NFTA_OBJ_TABLE] ||
>> > > + nla[NFTA_OBJ_TYPE]) {
>> > > + filter = nft_obj_filter_alloc(nla);
>> > > + if (IS_ERR(filter))
>> > > + return -ENOMEM;
>> > > + }
>> > > +
>> > > + cb->data = filter;
>>
>> And this replaced the on-stack input with dynamically
>> allocated one, which will be free'd via ->done().
>>
>> > > /* called with rcu_read_lock held */
>> > > static int nf_tables_getobj(struct net *net, struct sock *nlsk,
>> > > struct sk_buff *skb, const struct nlmsghdr
>> *nlh,
>> > > @@ -5028,21 +5047,13 @@ static int nf_tables_getobj(struct net *net,
>> struct sock *nlsk,
>> > >
>> > > if (nlh->nlmsg_flags & NLM_F_DUMP) {
>> > > struct netlink_dump_control c = {
>> > > + .start = nf_tables_dump_obj_start,
>> > > .dump = nf_tables_dump_obj,
>> > > .done = nf_tables_dump_obj_done,
>> > > .module = THIS_MODULE,
>> > > + .data = (void *)nla,
>> >
>> > You cannot do this.
>> >
>> > nla is allocated in this stack.
>>
>> Yes.
>>
>> > the nla will not be available in the second recv(), it won't be there.
>>
>> Its replaced in ->start().
>>
>> As David pointed out, once ->start() returns 0 we set cb_running, i.e.
>> only after successful ->start() netlink core will call ->dump() again.
>>
>> So I see no problem setting ->data to onstack cookie and then
>> duplicating it to heap via kmemdup in ->start().
>>
>> As far as I can see netlink core offers all functionality already,
>> so we only need to switch netfilter to make use of it.
>>
>> If you disagree please let me know, otherwise I will cook up
>> a patch along this pattern for net/netfilter/*.
>>
>> Thanks.
>>
>
>

2018-07-23 10:45:53

by Pablo Neira Ayuso

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

On Mon, Jul 23, 2018 at 06:34:36PM +0800, shaochun chen wrote:
> I have a question: we will try_module_get in __netlink_dump_start(),
> but why we need to call try_module_get again in nft_netlink_dump_start ??

Because they refer to two different modules. nfnetlink is multiplexing
all netfilter subsystem through one single netlink bus.

At the time that decision was made, there were concerns about netlink
running out of busses.

2018-07-23 10:53:16

by Florian Westphal

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

Pablo Neira Ayuso <[email protected]> wrote:
> On Mon, Jul 23, 2018 at 11:42:28AM +0200, Florian Westphal wrote:
> We can also add another scratchpad area, similar to the ->cb[x] area
> that can be initialized before netlink_dump_start()? So we don't need
> the data pointer.
>
> By passing the array of attributes, we'll need to do attribute parsing
> over and over again from each netlink_dump() call.

Its still done once, parsing is only delayed/moved to ->start().

Now:

nla_parse
control->data = kmalloc() + init
netlink_dump_start()
->start() /* if it returns 0, done() will be called eventually
netlink_dump()
->done()
free(cb->data);

Proposed fix:
control->data = &on_stack;
netlink_dump_start()
->start()
nla_parse
cb->data = kmalloc() + init
netlink_dump()
->done()
free(cb->data);

I've submitted a patch for nf_tables, let me know if you have a better
idea or see a problem with it.

2018-07-23 11:00:30

by Florian Westphal

[permalink] [raw]
Subject: Re: [PATCH] netlink: fix memory leak of dump

shaochun chen <[email protected]> wrote:
> I have a question: we will try_module_get in __netlink_dump_start(),

Thats too late, we release rcu read lock before this, so the module
implementing ->dump might have been removed already.

> but why we need to call try_module_get again in nft_netlink_dump_start ??

Its the other way around.
This is the first try_module_get; at this point we still hold rcu read
lock.

If nf_tables module is being removed, try_module_get will fail and
we can error out.

If it succeeds, its safe to drop the rcu read lock.