BPF supports creating high resolution timers using bpf_timer_* helper
functions. Currently, only the BPF_F_TIMER_ABS flag is supported, which
specifies that the timeout should be interpreted as absolute time. It
would also be useful to be able to pin that timer to a core. For
example, if you wanted to make a subset of cores run without timer
interrupts, and only have the timer be invoked on a single core.
This patch adds support for this with a new BPF_F_TIMER_CPU_PIN flag.
When specified, the HRTIMER_MODE_PINNED flag is passed to
hrtimer_start(). A subsequent patch will update selftests to validate.
Signed-off-by: David Vernet <[email protected]>
---
include/uapi/linux/bpf.h | 4 ++++
kernel/bpf/helpers.c | 5 ++++-
tools/include/uapi/linux/bpf.h | 4 ++++
3 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 70bfa997e896..a7d4a1a69f21 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5096,6 +5096,8 @@ union bpf_attr {
* **BPF_F_TIMER_ABS**
* Start the timer in absolute expire value instead of the
* default relative one.
+ * **BPF_F_TIMER_CPU_PIN**
+ * Timer will be pinned to the CPU of the caller.
*
* Return
* 0 on success.
@@ -7309,9 +7311,11 @@ struct bpf_core_relo {
* Flags to control bpf_timer_start() behaviour.
* - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
* relative to current time.
+ * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller.
*/
enum {
BPF_F_TIMER_ABS = (1ULL << 0),
+ BPF_F_TIMER_CPU_PIN = (1ULL << 1),
};
/* BPF numbers iterator state */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index dd1c69ee3375..d2840dd5b00d 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1272,7 +1272,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
if (in_nmi())
return -EOPNOTSUPP;
- if (flags > BPF_F_TIMER_ABS)
+ if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN))
return -EINVAL;
__bpf_spin_lock_irqsave(&timer->lock);
t = timer->timer;
@@ -1286,6 +1286,9 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
else
mode = HRTIMER_MODE_REL_SOFT;
+ if (flags & BPF_F_TIMER_CPU_PIN)
+ mode |= HRTIMER_MODE_PINNED;
+
hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
out:
__bpf_spin_unlock_irqrestore(&timer->lock);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 70bfa997e896..a7d4a1a69f21 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5096,6 +5096,8 @@ union bpf_attr {
* **BPF_F_TIMER_ABS**
* Start the timer in absolute expire value instead of the
* default relative one.
+ * **BPF_F_TIMER_CPU_PIN**
+ * Timer will be pinned to the CPU of the caller.
*
* Return
* 0 on success.
@@ -7309,9 +7311,11 @@ struct bpf_core_relo {
* Flags to control bpf_timer_start() behaviour.
* - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
* relative to current time.
+ * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller.
*/
enum {
BPF_F_TIMER_ABS = (1ULL << 0),
+ BPF_F_TIMER_CPU_PIN = (1ULL << 1),
};
/* BPF numbers iterator state */
--
2.41.0
Now that we support pinning a BPF timer to the current core, we should
test it with some selftests. This patch adds two new testcases to the
timer suite, which verifies that a BPF timer both with and without
BPF_F_TIMER_ABS, can be pinned to the calling core with
BPF_F_TIMER_CPU_PIN.
Signed-off-by: David Vernet <[email protected]>
---
.../testing/selftests/bpf/prog_tests/timer.c | 4 +
tools/testing/selftests/bpf/progs/timer.c | 75 +++++++++++++++++++
2 files changed, 79 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 290c21dbe65a..d8bc838445ec 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -14,6 +14,7 @@ static int timer(struct timer *timer_skel)
ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
+ ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1");
prog_fd = bpf_program__fd(timer_skel->progs.test1);
err = bpf_prog_test_run_opts(prog_fd, &topts);
@@ -32,6 +33,9 @@ static int timer(struct timer *timer_skel)
/* check that timer_cb3() was executed twice */
ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data");
+ /* check that timer_cb_pinned() was executed twice */
+ ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check");
+
/* check that there were no errors in timer execution */
ASSERT_EQ(timer_skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index 9a16d95213e1..0112b9c038b4 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -53,12 +53,28 @@ struct {
__type(value, struct elem);
} abs_timer SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} soft_timer_pinned SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} abs_timer_pinned SEC(".maps");
+
__u64 bss_data;
__u64 abs_data;
__u64 err;
__u64 ok;
__u64 callback_check = 52;
__u64 callback2_check = 52;
+__u64 pinned_callback_check;
+__s32 pinned_cpu;
#define ARRAY 1
#define HTAB 2
@@ -329,3 +345,62 @@ int BPF_PROG2(test3, int, a)
return 0;
}
+
+/* callback for pinned timer */
+static int timer_cb_pinned(void *map, int *key, struct bpf_timer *timer)
+{
+ __s32 cpu = bpf_get_smp_processor_id();
+
+ if (cpu != pinned_cpu)
+ err |= 16384;
+
+ pinned_callback_check++;
+ return 0;
+}
+
+static void test_pinned_timer(bool soft)
+{
+ int key = 0;
+ void *map;
+ struct bpf_timer *timer;
+ __u64 flags = BPF_F_TIMER_CPU_PIN;
+ __u64 start_time;
+
+ if (soft) {
+ map = &soft_timer_pinned;
+ start_time = 0;
+ } else {
+ map = &abs_timer_pinned;
+ start_time = bpf_ktime_get_boot_ns();
+ flags |= BPF_F_TIMER_ABS;
+ }
+
+ timer = bpf_map_lookup_elem(map, &key);
+ if (timer) {
+ if (bpf_timer_init(timer, map, CLOCK_BOOTTIME) != 0)
+ err |= 4096;
+ bpf_timer_set_callback(timer, timer_cb_pinned);
+ pinned_cpu = bpf_get_smp_processor_id();
+ bpf_timer_start(timer, start_time + 1000, flags);
+ } else {
+ err |= 8192;
+ }
+}
+
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG2(test4, int, a)
+{
+ bpf_printk("test4");
+ test_pinned_timer(true);
+
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test5")
+int BPF_PROG2(test5, int, a)
+{
+ bpf_printk("test5");
+ test_pinned_timer(false);
+
+ return 0;
+}
--
2.41.0
> On Oct 2, 2023, at 4:47 PM, David Vernet <[email protected]> wrote:
>
> BPF supports creating high resolution timers using bpf_timer_* helper
> functions. Currently, only the BPF_F_TIMER_ABS flag is supported, which
> specifies that the timeout should be interpreted as absolute time. It
> would also be useful to be able to pin that timer to a core. For
> example, if you wanted to make a subset of cores run without timer
> interrupts, and only have the timer be invoked on a single core.
>
> This patch adds support for this with a new BPF_F_TIMER_CPU_PIN flag.
> When specified, the HRTIMER_MODE_PINNED flag is passed to
> hrtimer_start(). A subsequent patch will update selftests to validate.
>
> Signed-off-by: David Vernet <[email protected]>
Acked-by: Song Liu <[email protected]>
> On Oct 2, 2023, at 4:47 PM, David Vernet <[email protected]> wrote:
>
> Now that we support pinning a BPF timer to the current core, we should
> test it with some selftests. This patch adds two new testcases to the
> timer suite, which verifies that a BPF timer both with and without
> BPF_F_TIMER_ABS, can be pinned to the calling core with
> BPF_F_TIMER_CPU_PIN.
>
> Signed-off-by: David Vernet <[email protected]>
Acked-by: Song Liu <[email protected]>
With one nit/question below.
> ---
> .../testing/selftests/bpf/prog_tests/timer.c | 4 +
> tools/testing/selftests/bpf/progs/timer.c | 75 +++++++++++++++++++
> 2 files changed, 79 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
> index 290c21dbe65a..d8bc838445ec 100644
> --- a/tools/testing/selftests/bpf/prog_tests/timer.c
> +++ b/tools/testing/selftests/bpf/prog_tests/timer.c
> @@ -14,6 +14,7 @@ static int timer(struct timer *timer_skel)
>
> ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
> ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
> + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1");
>
> prog_fd = bpf_program__fd(timer_skel->progs.test1);
> err = bpf_prog_test_run_opts(prog_fd, &topts);
> @@ -32,6 +33,9 @@ static int timer(struct timer *timer_skel)
> /* check that timer_cb3() was executed twice */
> ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data");
>
> + /* check that timer_cb_pinned() was executed twice */
> + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check");
> +
> /* check that there were no errors in timer execution */
> ASSERT_EQ(timer_skel->bss->err, 0, "err");
>
> diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
> index 9a16d95213e1..0112b9c038b4 100644
> --- a/tools/testing/selftests/bpf/progs/timer.c
> +++ b/tools/testing/selftests/bpf/progs/timer.c
> @@ -53,12 +53,28 @@ struct {
> __type(value, struct elem);
> } abs_timer SEC(".maps");
>
> +struct {
> + __uint(type, BPF_MAP_TYPE_ARRAY);
> + __uint(max_entries, 1);
> + __type(key, int);
> + __type(value, struct elem);
> +} soft_timer_pinned SEC(".maps");
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_ARRAY);
> + __uint(max_entries, 1);
> + __type(key, int);
> + __type(value, struct elem);
> +} abs_timer_pinned SEC(".maps");
nit: I think we can also do something like the following, but I am not
sure whether this style is not recommended.
diff --git i/tools/testing/selftests/bpf/progs/timer.c w/tools/testing/selftests/bpf/progs/timer.c
index 9a16d95213e1..638eeebcd6c9 100644
--- i/tools/testing/selftests/bpf/progs/timer.c
+++ w/tools/testing/selftests/bpf/progs/timer.c
@@ -51,7 +51,7 @@ struct {
__uint(max_entries, 1);
__type(key, int);
__type(value, struct elem);
-} abs_timer SEC(".maps");
+} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps");
__u64 bss_data;
__u64 abs_data;
[...]
On Tue, Oct 03, 2023 at 06:15:03PM +0000, Song Liu wrote:
>
>
> > On Oct 2, 2023, at 4:47 PM, David Vernet <[email protected]> wrote:
> >
> > Now that we support pinning a BPF timer to the current core, we should
> > test it with some selftests. This patch adds two new testcases to the
> > timer suite, which verifies that a BPF timer both with and without
> > BPF_F_TIMER_ABS, can be pinned to the calling core with
> > BPF_F_TIMER_CPU_PIN.
> >
> > Signed-off-by: David Vernet <[email protected]>
>
> Acked-by: Song Liu <[email protected]>
>
> With one nit/question below.
>
> > ---
> > .../testing/selftests/bpf/prog_tests/timer.c | 4 +
> > tools/testing/selftests/bpf/progs/timer.c | 75 +++++++++++++++++++
> > 2 files changed, 79 insertions(+)
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
> > index 290c21dbe65a..d8bc838445ec 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/timer.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/timer.c
> > @@ -14,6 +14,7 @@ static int timer(struct timer *timer_skel)
> >
> > ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
> > ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
> > + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1");
> >
> > prog_fd = bpf_program__fd(timer_skel->progs.test1);
> > err = bpf_prog_test_run_opts(prog_fd, &topts);
> > @@ -32,6 +33,9 @@ static int timer(struct timer *timer_skel)
> > /* check that timer_cb3() was executed twice */
> > ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data");
> >
> > + /* check that timer_cb_pinned() was executed twice */
> > + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check");
> > +
> > /* check that there were no errors in timer execution */
> > ASSERT_EQ(timer_skel->bss->err, 0, "err");
> >
> > diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
> > index 9a16d95213e1..0112b9c038b4 100644
> > --- a/tools/testing/selftests/bpf/progs/timer.c
> > +++ b/tools/testing/selftests/bpf/progs/timer.c
> > @@ -53,12 +53,28 @@ struct {
> > __type(value, struct elem);
> > } abs_timer SEC(".maps");
> >
> > +struct {
> > + __uint(type, BPF_MAP_TYPE_ARRAY);
> > + __uint(max_entries, 1);
> > + __type(key, int);
> > + __type(value, struct elem);
> > +} soft_timer_pinned SEC(".maps");
> > +
> > +struct {
> > + __uint(type, BPF_MAP_TYPE_ARRAY);
> > + __uint(max_entries, 1);
> > + __type(key, int);
> > + __type(value, struct elem);
> > +} abs_timer_pinned SEC(".maps");
>
> nit: I think we can also do something like the following, but I am not
> sure whether this style is not recommended.
>
> diff --git i/tools/testing/selftests/bpf/progs/timer.c w/tools/testing/selftests/bpf/progs/timer.c
> index 9a16d95213e1..638eeebcd6c9 100644
> --- i/tools/testing/selftests/bpf/progs/timer.c
> +++ w/tools/testing/selftests/bpf/progs/timer.c
> @@ -51,7 +51,7 @@ struct {
> __uint(max_entries, 1);
> __type(key, int);
> __type(value, struct elem);
> -} abs_timer SEC(".maps");
> +} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps");
This looks like a nice readability improvement / cleanup to me. If
nobody objects, I'd say let's apply it.
Thanks,
David