2022-05-15 20:46:16

by Yosry Ahmed

[permalink] [raw]
Subject: [RFC PATCH bpf-next v2 2/7] cgroup: bpf: flush bpf stats on rstat flush

When an rstat flush is ongoing for a cgroup, also flush bpf stats by
running any attached rstat flush programs.

Signed-off-by: Yosry Ahmed <[email protected]>
---
include/linux/bpf-rstat.h | 6 ++++++
kernel/bpf/rstat.c | 21 +++++++++++++++++++++
kernel/cgroup/rstat.c | 2 ++
3 files changed, 29 insertions(+)

diff --git a/include/linux/bpf-rstat.h b/include/linux/bpf-rstat.h
index 23cad23b5fc2..55e000fe0f47 100644
--- a/include/linux/bpf-rstat.h
+++ b/include/linux/bpf-rstat.h
@@ -12,6 +12,8 @@
int bpf_rstat_link_attach(const union bpf_attr *attr,
struct bpf_prog *prog);

+void bpf_rstat_flush(struct cgroup *cgrp, int cpu);
+
#else /* defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_CGROUPS) */

static inline int bpf_rstat_link_attach(const union bpf_attr *attr,
@@ -20,6 +22,10 @@ static inline int bpf_rstat_link_attach(const union bpf_attr *attr,
return -ENOTSUPP;
}

+static inline void bpf_rstat_flush(struct cgroup *cgrp, int cpu)
+{
+}
+
#endif /* defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_CGROUPS) */

#endif /* _BPF_RSTAT */
diff --git a/kernel/bpf/rstat.c b/kernel/bpf/rstat.c
index 5f529002d4b9..e96bc080f4b9 100644
--- a/kernel/bpf/rstat.c
+++ b/kernel/bpf/rstat.c
@@ -164,3 +164,24 @@ int bpf_rstat_link_attach(const union bpf_attr *attr,

return bpf_link_settle(&link_primer);
}
+
+void bpf_rstat_flush(struct cgroup *cgrp, int cpu)
+{
+ struct bpf_rstat_flusher *flusher;
+ struct bpf_rstat_flush_ctx ctx = {
+ .cgrp = cgrp,
+ .parent = cgroup_parent(cgrp),
+ .cpu = cpu,
+ };
+
+ rcu_read_lock();
+ migrate_disable();
+ spin_lock(&bpf_rstat_flushers_lock);
+
+ list_for_each_entry(flusher, &bpf_rstat_flushers, list)
+ (void) bpf_prog_run(flusher->prog, &ctx);
+
+ spin_unlock(&bpf_rstat_flushers_lock);
+ migrate_enable();
+ rcu_read_unlock();
+}
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 24b5c2ab5598..0285d496e807 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -2,6 +2,7 @@
#include "cgroup-internal.h"

#include <linux/sched/cputime.h>
+#include <linux/bpf-rstat.h>

static DEFINE_SPINLOCK(cgroup_rstat_lock);
static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
@@ -168,6 +169,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
struct cgroup_subsys_state *css;

cgroup_base_stat_flush(pos, cpu);
+ bpf_rstat_flush(pos, cpu);

rcu_read_lock();
list_for_each_entry_rcu(css, &pos->rstat_css_list,
--
2.36.0.550.gb090851708-goog



2022-05-17 10:12:41

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v2 2/7] cgroup: bpf: flush bpf stats on rstat flush

On Sun, May 15, 2022 at 02:34:59AM +0000, Yosry Ahmed wrote:
> +
> +void bpf_rstat_flush(struct cgroup *cgrp, int cpu)
> +{
> + struct bpf_rstat_flusher *flusher;
> + struct bpf_rstat_flush_ctx ctx = {
> + .cgrp = cgrp,
> + .parent = cgroup_parent(cgrp),
> + .cpu = cpu,
> + };
> +
> + rcu_read_lock();
> + migrate_disable();
> + spin_lock(&bpf_rstat_flushers_lock);
> +
> + list_for_each_entry(flusher, &bpf_rstat_flushers, list)
> + (void) bpf_prog_run(flusher->prog, &ctx);
> +
> + spin_unlock(&bpf_rstat_flushers_lock);
> + migrate_enable();
> + rcu_read_unlock();
> +}
> diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
> index 24b5c2ab5598..0285d496e807 100644
> --- a/kernel/cgroup/rstat.c
> +++ b/kernel/cgroup/rstat.c
> @@ -2,6 +2,7 @@
> #include "cgroup-internal.h"
>
> #include <linux/sched/cputime.h>
> +#include <linux/bpf-rstat.h>
>
> static DEFINE_SPINLOCK(cgroup_rstat_lock);
> static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
> @@ -168,6 +169,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
> struct cgroup_subsys_state *css;
>
> cgroup_base_stat_flush(pos, cpu);
> + bpf_rstat_flush(pos, cpu);

Please use the following approach instead:

__weak noinline void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu)
{
}

and change above line to:
bpf_rstat_flush(pos, cgroup_parent(pos), cpu);

Then tracing bpf fentry progs will be able to attach to bpf_rstat_flush.
Pretty much the patches 1, 2, 3 are not necessary.
In patch 4 add bpf_cgroup_rstat_updated/flush as two kfuncs instead of stable helpers.

This way patches 1,2,3,4 will become 2 trivial patches and we will be
able to extend the interface between cgroup rstat and bpf whenever we need
without worrying about uapi stability.

We had similar discusison with HID subsystem that plans to use bpf in HID
with the same approach.
See this patch set:
https://lore.kernel.org/bpf/[email protected]/
You'd need patch 1 from it to enable kfuncs for tracing.

Your patch 5 is needed as-is.
Yonghong,
please review it.
Different approach for patch 1-4 won't affect patch 5.
Patches 6 and 7 look good.

With this approach that patch 7 will mostly stay as-is. Instead of:
+SEC("rstat/flush")
+int vmscan_flush(struct bpf_rstat_flush_ctx *ctx)
+{
+ struct vmscan_percpu *pcpu_stat;
+ struct vmscan *total_stat, *parent_stat;
+ struct cgroup *cgrp = ctx->cgrp, *parent = ctx->parent;

it will become

SEC("fentry/bpf_rstat_flush")
int BPF_PROG(vmscan_flush, struct cgroup *cgrp, struct cgroup *parent, int cpu)

2022-05-18 03:42:16

by Yosry Ahmed

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v2 2/7] cgroup: bpf: flush bpf stats on rstat flush

On Mon, May 16, 2022 at 7:08 PM Alexei Starovoitov
<[email protected]> wrote:
>
> On Sun, May 15, 2022 at 02:34:59AM +0000, Yosry Ahmed wrote:
> > +
> > +void bpf_rstat_flush(struct cgroup *cgrp, int cpu)
> > +{
> > + struct bpf_rstat_flusher *flusher;
> > + struct bpf_rstat_flush_ctx ctx = {
> > + .cgrp = cgrp,
> > + .parent = cgroup_parent(cgrp),
> > + .cpu = cpu,
> > + };
> > +
> > + rcu_read_lock();
> > + migrate_disable();
> > + spin_lock(&bpf_rstat_flushers_lock);
> > +
> > + list_for_each_entry(flusher, &bpf_rstat_flushers, list)
> > + (void) bpf_prog_run(flusher->prog, &ctx);
> > +
> > + spin_unlock(&bpf_rstat_flushers_lock);
> > + migrate_enable();
> > + rcu_read_unlock();
> > +}
> > diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
> > index 24b5c2ab5598..0285d496e807 100644
> > --- a/kernel/cgroup/rstat.c
> > +++ b/kernel/cgroup/rstat.c
> > @@ -2,6 +2,7 @@
> > #include "cgroup-internal.h"
> >
> > #include <linux/sched/cputime.h>
> > +#include <linux/bpf-rstat.h>
> >
> > static DEFINE_SPINLOCK(cgroup_rstat_lock);
> > static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
> > @@ -168,6 +169,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
> > struct cgroup_subsys_state *css;
> >
> > cgroup_base_stat_flush(pos, cpu);
> > + bpf_rstat_flush(pos, cpu);
>
> Please use the following approach instead:
>
> __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu)
> {
> }
>
> and change above line to:
> bpf_rstat_flush(pos, cgroup_parent(pos), cpu);
>
> Then tracing bpf fentry progs will be able to attach to bpf_rstat_flush.
> Pretty much the patches 1, 2, 3 are not necessary.
> In patch 4 add bpf_cgroup_rstat_updated/flush as two kfuncs instead of stable helpers.
>
> This way patches 1,2,3,4 will become 2 trivial patches and we will be
> able to extend the interface between cgroup rstat and bpf whenever we need
> without worrying about uapi stability.
>
> We had similar discusison with HID subsystem that plans to use bpf in HID
> with the same approach.
> See this patch set:
> https://lore.kernel.org/bpf/[email protected]/
> You'd need patch 1 from it to enable kfuncs for tracing.
>
> Your patch 5 is needed as-is.
> Yonghong,
> please review it.
> Different approach for patch 1-4 won't affect patch 5.
> Patches 6 and 7 look good.
>
> With this approach that patch 7 will mostly stay as-is. Instead of:
> +SEC("rstat/flush")
> +int vmscan_flush(struct bpf_rstat_flush_ctx *ctx)
> +{
> + struct vmscan_percpu *pcpu_stat;
> + struct vmscan *total_stat, *parent_stat;
> + struct cgroup *cgrp = ctx->cgrp, *parent = ctx->parent;
>
> it will become
>
> SEC("fentry/bpf_rstat_flush")
> int BPF_PROG(vmscan_flush, struct cgroup *cgrp, struct cgroup *parent, int cpu)

Thanks so much for taking the time to look into this.

Indeed, this approach looks cleaner and simpler. I will incorporate
that into a V1 and send it. Thanks!