2023-07-21 20:48:43

by Daniel Xu

[permalink] [raw]
Subject: [PATCH bpf-next v6 2/5] netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link

This commit adds support for enabling IP defrag using pre-existing
netfilter defrag support. Basically all the flag does is bump a refcnt
while the link the active. Checks are also added to ensure the prog
requesting defrag support is run _after_ netfilter defrag hooks.

We also take care to avoid any issues w.r.t. module unloading -- while
defrag is active on a link, the module is prevented from unloading.

Signed-off-by: Daniel Xu <[email protected]>
---
include/uapi/linux/bpf.h | 5 ++
net/netfilter/nf_bpf_link.c | 123 +++++++++++++++++++++++++++++----
tools/include/uapi/linux/bpf.h | 5 ++
3 files changed, 118 insertions(+), 15 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 739c15906a65..12a5480314a2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1187,6 +1187,11 @@ enum bpf_perf_event_type {
*/
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)

+/* link_create.netfilter.flags used in LINK_CREATE command for
+ * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
+ */
+#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index c36da56d756f..8fe594bbc7e2 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
#include <linux/netfilter.h>

#include <net/netfilter/nf_bpf_link.h>
@@ -23,8 +25,88 @@ struct bpf_nf_link {
struct nf_hook_ops hook_ops;
struct net *net;
u32 dead;
+ const struct nf_defrag_hook *defrag_hook;
};

+static const struct nf_defrag_hook *
+get_proto_defrag_hook(struct bpf_nf_link *link,
+ const struct nf_defrag_hook __rcu *global_hook,
+ const char *mod)
+{
+ const struct nf_defrag_hook *hook;
+ int err;
+
+ /* RCU protects us from races against module unloading */
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ if (!hook) {
+ rcu_read_unlock();
+ err = request_module(mod);
+ if (err)
+ return ERR_PTR(err < 0 ? err : -EINVAL);
+
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ }
+
+ if (hook && try_module_get(hook->owner)) {
+ /* Once we have a refcnt on the module, we no longer need RCU */
+ hook = rcu_pointer_handoff(hook);
+ } else {
+ WARN_ONCE(!hook, "%s has bad registration", mod);
+ hook = ERR_PTR(-ENOENT);
+ }
+ rcu_read_unlock();
+
+ if (!IS_ERR(hook)) {
+ err = hook->enable(link->net);
+ if (err) {
+ module_put(hook->owner);
+ hook = ERR_PTR(err);
+ }
+ }
+
+ return hook;
+}
+
+static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook __maybe_unused *hook;
+
+ switch (link->hook_ops.pf) {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ case NFPROTO_IPV4:
+ hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ case NFPROTO_IPV6:
+ hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook *hook = link->defrag_hook;
+
+ if (!hook)
+ return;
+ hook->disable(link->net);
+ module_put(hook->owner);
+}
+
static void bpf_nf_link_release(struct bpf_link *link)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
@@ -32,11 +114,11 @@ static void bpf_nf_link_release(struct bpf_link *link)
if (nf_link->dead)
return;

- /* prevent hook-not-found warning splat from netfilter core when
- * .detach was already called
- */
- if (!cmpxchg(&nf_link->dead, 0, 1))
+ /* do not double release in case .detach was already called */
+ if (!cmpxchg(&nf_link->dead, 0, 1)) {
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
+ bpf_nf_disable_defrag(nf_link);
+ }
}

static void bpf_nf_link_dealloc(struct bpf_link *link)
@@ -92,6 +174,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {

static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
{
+ int prio;
+
switch (attr->link_create.netfilter.pf) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
@@ -102,19 +186,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
return -EAFNOSUPPORT;
}

- if (attr->link_create.netfilter.flags)
+ if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
return -EOPNOTSUPP;

- /* make sure conntrack confirm is always last.
- *
- * In the future, if userspace can e.g. request defrag, then
- * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
- * should fail.
- */
- switch (attr->link_create.netfilter.priority) {
- case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
- case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
- }
+ /* make sure conntrack confirm is always last */
+ prio = attr->link_create.netfilter.priority;
+ if (prio == NF_IP_PRI_FIRST)
+ return -ERANGE; /* sabotage_in and other warts */
+ else if (prio == NF_IP_PRI_LAST)
+ return -ERANGE; /* e.g. conntrack confirm */
+ else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
+ prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
+ return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */

return 0;
}
@@ -149,6 +232,7 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)

link->net = net;
link->dead = false;
+ link->defrag_hook = NULL;

err = bpf_link_prime(&link->link, &link_primer);
if (err) {
@@ -156,8 +240,17 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}

+ if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
+ err = bpf_nf_enable_defrag(link);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ return err;
+ }
+ }
+
err = nf_register_net_hook(net, &link->hook_ops);
if (err) {
+ bpf_nf_disable_defrag(link);
bpf_link_cleanup(&link_primer);
return err;
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 739c15906a65..12a5480314a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1187,6 +1187,11 @@ enum bpf_perf_event_type {
*/
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)

+/* link_create.netfilter.flags used in LINK_CREATE command for
+ * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
+ */
+#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
--
2.41.0



2023-07-28 02:33:27

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH bpf-next v6 2/5] netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link

On Fri, Jul 21, 2023 at 02:22:46PM -0600, Daniel Xu wrote:
> This commit adds support for enabling IP defrag using pre-existing
> netfilter defrag support. Basically all the flag does is bump a refcnt
> while the link the active. Checks are also added to ensure the prog
> requesting defrag support is run _after_ netfilter defrag hooks.
>
> We also take care to avoid any issues w.r.t. module unloading -- while
> defrag is active on a link, the module is prevented from unloading.
>
> Signed-off-by: Daniel Xu <[email protected]>
> ---
> include/uapi/linux/bpf.h | 5 ++
> net/netfilter/nf_bpf_link.c | 123 +++++++++++++++++++++++++++++----
> tools/include/uapi/linux/bpf.h | 5 ++
> 3 files changed, 118 insertions(+), 15 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 739c15906a65..12a5480314a2 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1187,6 +1187,11 @@ enum bpf_perf_event_type {
> */
> #define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
>
> +/* link_create.netfilter.flags used in LINK_CREATE command for
> + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
> + */
> +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
> +
> /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
> * the following extensions:
> *
> diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
> index c36da56d756f..8fe594bbc7e2 100644
> --- a/net/netfilter/nf_bpf_link.c
> +++ b/net/netfilter/nf_bpf_link.c
> @@ -1,6 +1,8 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <linux/bpf.h>
> #include <linux/filter.h>
> +#include <linux/kmod.h>
> +#include <linux/module.h>
> #include <linux/netfilter.h>
>
> #include <net/netfilter/nf_bpf_link.h>
> @@ -23,8 +25,88 @@ struct bpf_nf_link {
> struct nf_hook_ops hook_ops;
> struct net *net;
> u32 dead;
> + const struct nf_defrag_hook *defrag_hook;
> };
>
> +static const struct nf_defrag_hook *
> +get_proto_defrag_hook(struct bpf_nf_link *link,
> + const struct nf_defrag_hook __rcu *global_hook,
> + const char *mod)
> +{
> + const struct nf_defrag_hook *hook;
> + int err;
> +
> + /* RCU protects us from races against module unloading */
> + rcu_read_lock();
> + hook = rcu_dereference(global_hook);
> + if (!hook) {
> + rcu_read_unlock();
> + err = request_module(mod);
> + if (err)
> + return ERR_PTR(err < 0 ? err : -EINVAL);
> +
> + rcu_read_lock();
> + hook = rcu_dereference(global_hook);
> + }
> +
> + if (hook && try_module_get(hook->owner)) {
> + /* Once we have a refcnt on the module, we no longer need RCU */
> + hook = rcu_pointer_handoff(hook);
> + } else {
> + WARN_ONCE(!hook, "%s has bad registration", mod);
> + hook = ERR_PTR(-ENOENT);
> + }
> + rcu_read_unlock();
> +
> + if (!IS_ERR(hook)) {
> + err = hook->enable(link->net);
> + if (err) {
> + module_put(hook->owner);
> + hook = ERR_PTR(err);
> + }
> + }
> +
> + return hook;

The rcu + module_get logic looks correct to me, but you've dropped all Florian's acks.
What's going on?

We need explicit acks to merge this through bpf-next.

2023-07-28 04:59:38

by Daniel Xu

[permalink] [raw]
Subject: Re: [PATCH bpf-next v6 2/5] netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link

Hi Alexei,

On Thu, Jul 27, 2023 at 06:16:20PM -0700, Alexei Starovoitov wrote:
> On Fri, Jul 21, 2023 at 02:22:46PM -0600, Daniel Xu wrote:
> > This commit adds support for enabling IP defrag using pre-existing
> > netfilter defrag support. Basically all the flag does is bump a refcnt
> > while the link the active. Checks are also added to ensure the prog
> > requesting defrag support is run _after_ netfilter defrag hooks.
> >
> > We also take care to avoid any issues w.r.t. module unloading -- while
> > defrag is active on a link, the module is prevented from unloading.
> >
> > Signed-off-by: Daniel Xu <[email protected]>
> > ---
> > include/uapi/linux/bpf.h | 5 ++
> > net/netfilter/nf_bpf_link.c | 123 +++++++++++++++++++++++++++++----
> > tools/include/uapi/linux/bpf.h | 5 ++
> > 3 files changed, 118 insertions(+), 15 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 739c15906a65..12a5480314a2 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -1187,6 +1187,11 @@ enum bpf_perf_event_type {
> > */
> > #define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
> >
> > +/* link_create.netfilter.flags used in LINK_CREATE command for
> > + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
> > + */
> > +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
> > +
> > /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
> > * the following extensions:
> > *
> > diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
> > index c36da56d756f..8fe594bbc7e2 100644
> > --- a/net/netfilter/nf_bpf_link.c
> > +++ b/net/netfilter/nf_bpf_link.c
> > @@ -1,6 +1,8 @@
> > // SPDX-License-Identifier: GPL-2.0
> > #include <linux/bpf.h>
> > #include <linux/filter.h>
> > +#include <linux/kmod.h>
> > +#include <linux/module.h>
> > #include <linux/netfilter.h>
> >
> > #include <net/netfilter/nf_bpf_link.h>
> > @@ -23,8 +25,88 @@ struct bpf_nf_link {
> > struct nf_hook_ops hook_ops;
> > struct net *net;
> > u32 dead;
> > + const struct nf_defrag_hook *defrag_hook;
> > };
> >
> > +static const struct nf_defrag_hook *
> > +get_proto_defrag_hook(struct bpf_nf_link *link,
> > + const struct nf_defrag_hook __rcu *global_hook,
> > + const char *mod)
> > +{
> > + const struct nf_defrag_hook *hook;
> > + int err;
> > +
> > + /* RCU protects us from races against module unloading */
> > + rcu_read_lock();
> > + hook = rcu_dereference(global_hook);
> > + if (!hook) {
> > + rcu_read_unlock();
> > + err = request_module(mod);
> > + if (err)
> > + return ERR_PTR(err < 0 ? err : -EINVAL);
> > +
> > + rcu_read_lock();
> > + hook = rcu_dereference(global_hook);
> > + }
> > +
> > + if (hook && try_module_get(hook->owner)) {
> > + /* Once we have a refcnt on the module, we no longer need RCU */
> > + hook = rcu_pointer_handoff(hook);
> > + } else {
> > + WARN_ONCE(!hook, "%s has bad registration", mod);
> > + hook = ERR_PTR(-ENOENT);
> > + }
> > + rcu_read_unlock();
> > +
> > + if (!IS_ERR(hook)) {
> > + err = hook->enable(link->net);
> > + if (err) {
> > + module_put(hook->owner);
> > + hook = ERR_PTR(err);
> > + }
> > + }
> > +
> > + return hook;
>
> The rcu + module_get logic looks correct to me, but you've dropped all Florian's acks.
> What's going on?
>
> We need explicit acks to merge this through bpf-next.

I understood acked-by tags to be a lighter form of reviewed-by tag. So
b/c the patches changed so much I dropped the tag. It sounds like maybe
I misunderstand -- I'll keep it in mind the next time around.

Thanks,
Daniel

2023-07-28 10:07:19

by Florian Westphal

[permalink] [raw]
Subject: Re: [PATCH bpf-next v6 2/5] netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link

Daniel Xu <[email protected]> wrote:
> This commit adds support for enabling IP defrag using pre-existing
> netfilter defrag support. Basically all the flag does is bump a refcnt
> while the link the active. Checks are also added to ensure the prog
> requesting defrag support is run _after_ netfilter defrag hooks.
>
> We also take care to avoid any issues w.r.t. module unloading -- while
> defrag is active on a link, the module is prevented from unloading.

Reviewed-by: Florian Westphal <[email protected]>