perf_output_wakeup() already marks the perf event fd available for polling.
Add code to trigger IO signals with FASYNC too.
Signed-off-by: Kyle Huey <[email protected]>
---
include/linux/perf_event.h | 8 ++++++++
kernel/events/core.c | 8 --------
kernel/events/ring_buffer.c | 3 +++
3 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8bd5bb6610c..c077968d7e52 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1914,4 +1914,12 @@ static inline void perf_lopwr_cb(bool mode)
}
#endif
+static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
+{
+ /* only the parent has fasync state */
+ if (event->parent)
+ event = event->parent;
+ return &event->fasync;
+}
+
#endif /* _LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a329bec42c4d..36b5fbdf8e6e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6684,14 +6684,6 @@ static const struct file_operations perf_fops = {
* to user-space before waking everybody up.
*/
-static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
-{
- /* only the parent has fasync state */
- if (event->parent)
- event = event->parent;
- return &event->fasync;
-}
-
void perf_event_wakeup(struct perf_event *event)
{
ring_buffer_wakeup(event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 60ed43d1c29e..033e54bb5c62 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -22,6 +22,9 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
atomic_set(&handle->rb->poll, EPOLLIN);
handle->event->pending_wakeup = 1;
+ if (*perf_event_fasync(handle->event) && !handle->event->pending_kill)
+ handle->event->pending_kill = POLL_IN;
+
irq_work_queue(&handle->event->pending_irq);
}
--
2.34.1
The test uses PERF_RECORD_SWITCH records to fill the ring buffer and
trigger the watermark wakeup, which in turn should trigger an IO
signal.
Signed-off-by: Kyle Huey <[email protected]>
---
tools/perf/tests/Build | 1 +
tools/perf/tests/builtin-test.c | 1 +
tools/perf/tests/tests.h | 1 +
tools/perf/tests/watermark_signal.c | 123 ++++++++++++++++++++++++++++
4 files changed, 126 insertions(+)
create mode 100644 tools/perf/tests/watermark_signal.c
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 53ba9c3e20e0..de43eb60b280 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -67,6 +67,7 @@ perf-y += sigtrap.o
perf-y += event_groups.o
perf-y += symbols.o
perf-y += util.o
+perf-y += watermark_signal.o
ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 4a5973f9bb9b..715c01a2172a 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -124,6 +124,7 @@ static struct test_suite *generic_tests[] = {
&suite__event_groups,
&suite__symbols,
&suite__util,
+ &suite__watermark_signal,
NULL,
};
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index dad3d7414142..7ef4e0d0a77b 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -146,6 +146,7 @@ DECLARE_SUITE(sigtrap);
DECLARE_SUITE(event_groups);
DECLARE_SUITE(symbols);
DECLARE_SUITE(util);
+DECLARE_SUITE(watermark_signal);
/*
* PowerPC and S390 do not support creation of instruction breakpoints using the
diff --git a/tools/perf/tests/watermark_signal.c b/tools/perf/tests/watermark_signal.c
new file mode 100644
index 000000000000..ae4abedc4b7c
--- /dev/null
+++ b/tools/perf/tests/watermark_signal.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "event.h"
+#include "../perf-sys.h"
+#include "cloexec.h"
+#include <internal/lib.h> // page_size
+
+static int sigio_count;
+
+static void handle_sigio(int sig __always_unused)
+{
+ ++sigio_count;
+}
+
+static void do_child(void)
+{
+ for (int i = 0; i < 20; ++i)
+ sleep(1);
+
+ exit(0);
+}
+
+static int test__watermark_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_event_attr attr;
+ struct perf_event_mmap_page *p = NULL;
+ sighandler_t previous_sigio = SIG_ERR;
+ pid_t child = -1;
+ int fd = -1;
+ int ret = TEST_FAIL;
+
+ previous_sigio = signal(SIGIO, handle_sigio);
+ if (previous_sigio == SIG_ERR) {
+ pr_debug("failed setting SIGIO handler\n");
+ goto cleanup;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_DUMMY;
+ attr.sample_period = 1;
+ attr.disabled = 1;
+ attr.watermark = 1;
+ attr.context_switch = 1;
+ attr.wakeup_watermark = 1;
+
+ child = fork();
+ if (child == 0)
+ do_child();
+ else if (child < 0) {
+ pr_debug("failed fork() %d\n", errno);
+ goto cleanup;
+ }
+
+ fd = sys_perf_event_open(&attr, child, -1, -1,
+ perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ pr_debug("failed opening event %llx\n", attr.config);
+ goto cleanup;
+ }
+
+ if (fcntl(fd, F_SETFL, FASYNC)) {
+ pr_debug("failed F_SETFL FASYNC %d\n", errno);
+ goto cleanup;
+ }
+
+ if (fcntl(fd, F_SETOWN, getpid())) {
+ pr_debug("failed F_SETOWN getpid() %d\n", errno);
+ goto cleanup;
+ }
+
+ if (fcntl(fd, F_SETSIG, SIGIO)) {
+ pr_debug("failed F_SETSIG SIGIO %d\n", errno);
+ goto cleanup;
+ }
+
+ p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == NULL) {
+ pr_debug("failed to mmap\n");
+ goto cleanup;
+ }
+
+ if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) {
+ pr_debug("failed PERF_EVENT_IOC_ENABLE %d\n", errno);
+ goto cleanup;
+ }
+
+ sleep(30);
+
+ ret = (sigio_count == 1) ? TEST_OK : TEST_FAIL;
+
+cleanup:
+ if (p != NULL)
+ munmap(p, 2 * page_size);
+
+ if (fd >= 0)
+ close(fd);
+
+ if (child > 0) {
+ kill(child, SIGTERM);
+ waitpid(child, NULL, 0);
+ }
+
+ if (previous_sigio != SIG_ERR)
+ signal(SIGIO, previous_sigio);
+
+ return ret;
+}
+
+DEFINE_SUITE("Watermark signal handler", watermark_signal);
--
2.34.1
On Wed, Feb 21, 2024 at 9:52 AM Kyle Huey <[email protected]> wrote:
>
> The test uses PERF_RECORD_SWITCH records to fill the ring buffer and
> trigger the watermark wakeup, which in turn should trigger an IO
> signal.
>
> Signed-off-by: Kyle Huey <[email protected]>
> ---
> tools/perf/tests/Build | 1 +
> tools/perf/tests/builtin-test.c | 1 +
> tools/perf/tests/tests.h | 1 +
> tools/perf/tests/watermark_signal.c | 123 ++++++++++++++++++++++++++++
> 4 files changed, 126 insertions(+)
> create mode 100644 tools/perf/tests/watermark_signal.c
>
> diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
> index 53ba9c3e20e0..de43eb60b280 100644
> --- a/tools/perf/tests/Build
> +++ b/tools/perf/tests/Build
> @@ -67,6 +67,7 @@ perf-y += sigtrap.o
> perf-y += event_groups.o
> perf-y += symbols.o
> perf-y += util.o
> +perf-y += watermark_signal.o
>
> ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
> perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
> diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> index 4a5973f9bb9b..715c01a2172a 100644
> --- a/tools/perf/tests/builtin-test.c
> +++ b/tools/perf/tests/builtin-test.c
> @@ -124,6 +124,7 @@ static struct test_suite *generic_tests[] = {
> &suite__event_groups,
> &suite__symbols,
> &suite__util,
> + &suite__watermark_signal,
> NULL,
> };
>
> diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
> index dad3d7414142..7ef4e0d0a77b 100644
> --- a/tools/perf/tests/tests.h
> +++ b/tools/perf/tests/tests.h
> @@ -146,6 +146,7 @@ DECLARE_SUITE(sigtrap);
> DECLARE_SUITE(event_groups);
> DECLARE_SUITE(symbols);
> DECLARE_SUITE(util);
> +DECLARE_SUITE(watermark_signal);
>
> /*
> * PowerPC and S390 do not support creation of instruction breakpoints using the
> diff --git a/tools/perf/tests/watermark_signal.c b/tools/perf/tests/watermark_signal.c
> new file mode 100644
> index 000000000000..ae4abedc4b7c
> --- /dev/null
> +++ b/tools/perf/tests/watermark_signal.c
> @@ -0,0 +1,123 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <stddef.h>
> +#include <signal.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <sys/wait.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +
> +#include "tests.h"
> +#include "debug.h"
> +#include "event.h"
> +#include "../perf-sys.h"
> +#include "cloexec.h"
> +#include <internal/lib.h> // page_size
> +
> +static int sigio_count;
> +
> +static void handle_sigio(int sig __always_unused)
> +{
> + ++sigio_count;
> +}
> +
> +static void do_child(void)
> +{
> + for (int i = 0; i < 20; ++i)
> + sleep(1);
> +
> + exit(0);
> +}
> +
> +static int test__watermark_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
> +{
> + struct perf_event_attr attr;
> + struct perf_event_mmap_page *p = NULL;
> + sighandler_t previous_sigio = SIG_ERR;
> + pid_t child = -1;
> + int fd = -1;
> + int ret = TEST_FAIL;
> +
> + previous_sigio = signal(SIGIO, handle_sigio);
> + if (previous_sigio == SIG_ERR) {
> + pr_debug("failed setting SIGIO handler\n");
> + goto cleanup;
> + }
> +
> + memset(&attr, 0, sizeof(attr));
> + attr.size = sizeof(attr);
> + attr.type = PERF_TYPE_SOFTWARE;
> + attr.config = PERF_COUNT_SW_DUMMY;
> + attr.sample_period = 1;
> + attr.disabled = 1;
> + attr.watermark = 1;
> + attr.context_switch = 1;
> + attr.wakeup_watermark = 1;
> +
> + child = fork();
> + if (child == 0)
> + do_child();
> + else if (child < 0) {
> + pr_debug("failed fork() %d\n", errno);
> + goto cleanup;
> + }
> +
> + fd = sys_perf_event_open(&attr, child, -1, -1,
> + perf_event_open_cloexec_flag());
> + if (fd < 0) {
> + pr_debug("failed opening event %llx\n", attr.config);
> + goto cleanup;
> + }
> +
> + if (fcntl(fd, F_SETFL, FASYNC)) {
> + pr_debug("failed F_SETFL FASYNC %d\n", errno);
> + goto cleanup;
> + }
Thanks for the work! The perf tool and perf test should run on older
kernels ideally without failure. I'm assuming this would fail on an
older kernel. Could we make the return value skip in that case?
> +
> + if (fcntl(fd, F_SETOWN, getpid())) {
> + pr_debug("failed F_SETOWN getpid() %d\n", errno);
> + goto cleanup;
> + }
> +
> + if (fcntl(fd, F_SETSIG, SIGIO)) {
> + pr_debug("failed F_SETSIG SIGIO %d\n", errno);
> + goto cleanup;
> + }
> +
> + p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> + if (p == NULL) {
> + pr_debug("failed to mmap\n");
> + goto cleanup;
> + }
> +
> + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) {
> + pr_debug("failed PERF_EVENT_IOC_ENABLE %d\n", errno);
> + goto cleanup;
> + }
> +
> + sleep(30);
This is kind of a painful wait, could it be less? No sleeps would be best :-)
Thanks,
Ian
> +
> + ret = (sigio_count == 1) ? TEST_OK : TEST_FAIL;
> +
> +cleanup:
> + if (p != NULL)
> + munmap(p, 2 * page_size);
> +
> + if (fd >= 0)
> + close(fd);
> +
> + if (child > 0) {
> + kill(child, SIGTERM);
> + waitpid(child, NULL, 0);
> + }
> +
> + if (previous_sigio != SIG_ERR)
> + signal(SIGIO, previous_sigio);
> +
> + return ret;
> +}
> +
> +DEFINE_SUITE("Watermark signal handler", watermark_signal);
> --
> 2.34.1
>
On Wed, Feb 21, 2024 at 10:36 AM Ian Rogers <[email protected]> wrote:
>
> On Wed, Feb 21, 2024 at 9:52 AM Kyle Huey <[email protected]> wrote:
> >
> > The test uses PERF_RECORD_SWITCH records to fill the ring buffer and
> > trigger the watermark wakeup, which in turn should trigger an IO
> > signal.
> >
> > Signed-off-by: Kyle Huey <[email protected]>
> > ---
> > tools/perf/tests/Build | 1 +
> > tools/perf/tests/builtin-test.c | 1 +
> > tools/perf/tests/tests.h | 1 +
> > tools/perf/tests/watermark_signal.c | 123 ++++++++++++++++++++++++++++
> > 4 files changed, 126 insertions(+)
> > create mode 100644 tools/perf/tests/watermark_signal.c
> >
> > diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
> > index 53ba9c3e20e0..de43eb60b280 100644
> > --- a/tools/perf/tests/Build
> > +++ b/tools/perf/tests/Build
> > @@ -67,6 +67,7 @@ perf-y += sigtrap.o
> > perf-y += event_groups.o
> > perf-y += symbols.o
> > perf-y += util.o
> > +perf-y += watermark_signal.o
> >
> > ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
> > perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
> > diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> > index 4a5973f9bb9b..715c01a2172a 100644
> > --- a/tools/perf/tests/builtin-test.c
> > +++ b/tools/perf/tests/builtin-test.c
> > @@ -124,6 +124,7 @@ static struct test_suite *generic_tests[] = {
> > &suite__event_groups,
> > &suite__symbols,
> > &suite__util,
> > + &suite__watermark_signal,
> > NULL,
> > };
> >
> > diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
> > index dad3d7414142..7ef4e0d0a77b 100644
> > --- a/tools/perf/tests/tests.h
> > +++ b/tools/perf/tests/tests.h
> > @@ -146,6 +146,7 @@ DECLARE_SUITE(sigtrap);
> > DECLARE_SUITE(event_groups);
> > DECLARE_SUITE(symbols);
> > DECLARE_SUITE(util);
> > +DECLARE_SUITE(watermark_signal);
> >
> > /*
> > * PowerPC and S390 do not support creation of instruction breakpoints using the
> > diff --git a/tools/perf/tests/watermark_signal.c b/tools/perf/tests/watermark_signal.c
> > new file mode 100644
> > index 000000000000..ae4abedc4b7c
> > --- /dev/null
> > +++ b/tools/perf/tests/watermark_signal.c
> > @@ -0,0 +1,123 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <stddef.h>
> > +#include <signal.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/mman.h>
> > +#include <sys/wait.h>
> > +#include <unistd.h>
> > +#include <errno.h>
> > +#include <fcntl.h>
> > +
> > +#include "tests.h"
> > +#include "debug.h"
> > +#include "event.h"
> > +#include "../perf-sys.h"
> > +#include "cloexec.h"
> > +#include <internal/lib.h> // page_size
> > +
> > +static int sigio_count;
> > +
> > +static void handle_sigio(int sig __always_unused)
> > +{
> > + ++sigio_count;
> > +}
> > +
> > +static void do_child(void)
> > +{
> > + for (int i = 0; i < 20; ++i)
> > + sleep(1);
> > +
> > + exit(0);
> > +}
> > +
> > +static int test__watermark_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
> > +{
> > + struct perf_event_attr attr;
> > + struct perf_event_mmap_page *p = NULL;
> > + sighandler_t previous_sigio = SIG_ERR;
> > + pid_t child = -1;
> > + int fd = -1;
> > + int ret = TEST_FAIL;
> > +
> > + previous_sigio = signal(SIGIO, handle_sigio);
> > + if (previous_sigio == SIG_ERR) {
> > + pr_debug("failed setting SIGIO handler\n");
> > + goto cleanup;
> > + }
> > +
> > + memset(&attr, 0, sizeof(attr));
> > + attr.size = sizeof(attr);
> > + attr.type = PERF_TYPE_SOFTWARE;
> > + attr.config = PERF_COUNT_SW_DUMMY;
> > + attr.sample_period = 1;
> > + attr.disabled = 1;
> > + attr.watermark = 1;
> > + attr.context_switch = 1;
> > + attr.wakeup_watermark = 1;
> > +
> > + child = fork();
> > + if (child == 0)
> > + do_child();
> > + else if (child < 0) {
> > + pr_debug("failed fork() %d\n", errno);
> > + goto cleanup;
> > + }
> > +
> > + fd = sys_perf_event_open(&attr, child, -1, -1,
> > + perf_event_open_cloexec_flag());
> > + if (fd < 0) {
> > + pr_debug("failed opening event %llx\n", attr.config);
> > + goto cleanup;
> > + }
> > +
> > + if (fcntl(fd, F_SETFL, FASYNC)) {
> > + pr_debug("failed F_SETFL FASYNC %d\n", errno);
> > + goto cleanup;
> > + }
>
> Thanks for the work! The perf tool and perf test should run on older
> kernels ideally without failure. I'm assuming this would fail on an
> older kernel. Could we make the return value skip in that case?
Ah, hmm, I wasn't aware of that. This would fail on an older kernel,
yes. It's not possible to distinguish between an older kernel and a
kernel where this fix broke (at least not without hardcoding in an
expected good kernel version, which seems undesirable), so that would
mean the test would always return ok or skip, not ok or fail. Is that
ok?
> > +
> > + if (fcntl(fd, F_SETOWN, getpid())) {
> > + pr_debug("failed F_SETOWN getpid() %d\n", errno);
> > + goto cleanup;
> > + }
> > +
> > + if (fcntl(fd, F_SETSIG, SIGIO)) {
> > + pr_debug("failed F_SETSIG SIGIO %d\n", errno);
> > + goto cleanup;
> > + }
> > +
> > + p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> > + if (p == NULL) {
> > + pr_debug("failed to mmap\n");
> > + goto cleanup;
> > + }
> > +
> > + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) {
> > + pr_debug("failed PERF_EVENT_IOC_ENABLE %d\n", errno);
> > + goto cleanup;
> > + }
> > +
> > + sleep(30);
>
> This is kind of a painful wait, could it be less? No sleeps would be best :-)
We could synchronize with the child using SIGSTOP instead of sleeping
here. Not sure about getting rid of the tiny sleeps in the child. I
have observed that sched_yield() doesn't reliably trigger context
switches (which isn't surprising). I'll experiment with blocking on a
pipe or something.
- Kyle
> Thanks,
> Ian
>
> > +
> > + ret = (sigio_count == 1) ? TEST_OK : TEST_FAIL;
> > +
> > +cleanup:
> > + if (p != NULL)
> > + munmap(p, 2 * page_size);
> > +
> > + if (fd >= 0)
> > + close(fd);
> > +
> > + if (child > 0) {
> > + kill(child, SIGTERM);
> > + waitpid(child, NULL, 0);
> > + }
> > +
> > + if (previous_sigio != SIG_ERR)
> > + signal(SIGIO, previous_sigio);
> > +
> > + return ret;
> > +}
> > +
> > +DEFINE_SUITE("Watermark signal handler", watermark_signal);
> > --
> > 2.34.1
> >
On Thu, Feb 22, 2024 at 9:55 AM Kyle Huey <[email protected]> wrote:
>
> On Wed, Feb 21, 2024 at 10:36 AM Ian Rogers <[email protected]> wrote:
> >
> > On Wed, Feb 21, 2024 at 9:52 AM Kyle Huey <[email protected]> wrote:
> > >
> > > The test uses PERF_RECORD_SWITCH records to fill the ring buffer and
> > > trigger the watermark wakeup, which in turn should trigger an IO
> > > signal.
> > >
> > > Signed-off-by: Kyle Huey <[email protected]>
> > > ---
> > > tools/perf/tests/Build | 1 +
> > > tools/perf/tests/builtin-test.c | 1 +
> > > tools/perf/tests/tests.h | 1 +
> > > tools/perf/tests/watermark_signal.c | 123 ++++++++++++++++++++++++++++
> > > 4 files changed, 126 insertions(+)
> > > create mode 100644 tools/perf/tests/watermark_signal.c
> > >
> > > diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
> > > index 53ba9c3e20e0..de43eb60b280 100644
> > > --- a/tools/perf/tests/Build
> > > +++ b/tools/perf/tests/Build
> > > @@ -67,6 +67,7 @@ perf-y += sigtrap.o
> > > perf-y += event_groups.o
> > > perf-y += symbols.o
> > > perf-y += util.o
> > > +perf-y += watermark_signal.o
> > >
> > > ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
> > > perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
> > > diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> > > index 4a5973f9bb9b..715c01a2172a 100644
> > > --- a/tools/perf/tests/builtin-test.c
> > > +++ b/tools/perf/tests/builtin-test.c
> > > @@ -124,6 +124,7 @@ static struct test_suite *generic_tests[] = {
> > > &suite__event_groups,
> > > &suite__symbols,
> > > &suite__util,
> > > + &suite__watermark_signal,
> > > NULL,
> > > };
> > >
> > > diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
> > > index dad3d7414142..7ef4e0d0a77b 100644
> > > --- a/tools/perf/tests/tests.h
> > > +++ b/tools/perf/tests/tests.h
> > > @@ -146,6 +146,7 @@ DECLARE_SUITE(sigtrap);
> > > DECLARE_SUITE(event_groups);
> > > DECLARE_SUITE(symbols);
> > > DECLARE_SUITE(util);
> > > +DECLARE_SUITE(watermark_signal);
> > >
> > > /*
> > > * PowerPC and S390 do not support creation of instruction breakpoints using the
> > > diff --git a/tools/perf/tests/watermark_signal.c b/tools/perf/tests/watermark_signal.c
> > > new file mode 100644
> > > index 000000000000..ae4abedc4b7c
> > > --- /dev/null
> > > +++ b/tools/perf/tests/watermark_signal.c
> > > @@ -0,0 +1,123 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +#include <stddef.h>
> > > +#include <signal.h>
> > > +#include <stdlib.h>
> > > +#include <string.h>
> > > +#include <sys/ioctl.h>
> > > +#include <sys/mman.h>
> > > +#include <sys/wait.h>
> > > +#include <unistd.h>
> > > +#include <errno.h>
> > > +#include <fcntl.h>
> > > +
> > > +#include "tests.h"
> > > +#include "debug.h"
> > > +#include "event.h"
> > > +#include "../perf-sys.h"
> > > +#include "cloexec.h"
> > > +#include <internal/lib.h> // page_size
> > > +
> > > +static int sigio_count;
> > > +
> > > +static void handle_sigio(int sig __always_unused)
> > > +{
> > > + ++sigio_count;
> > > +}
> > > +
> > > +static void do_child(void)
> > > +{
> > > + for (int i = 0; i < 20; ++i)
> > > + sleep(1);
> > > +
> > > + exit(0);
> > > +}
> > > +
> > > +static int test__watermark_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
> > > +{
> > > + struct perf_event_attr attr;
> > > + struct perf_event_mmap_page *p = NULL;
> > > + sighandler_t previous_sigio = SIG_ERR;
> > > + pid_t child = -1;
> > > + int fd = -1;
> > > + int ret = TEST_FAIL;
> > > +
> > > + previous_sigio = signal(SIGIO, handle_sigio);
> > > + if (previous_sigio == SIG_ERR) {
> > > + pr_debug("failed setting SIGIO handler\n");
> > > + goto cleanup;
> > > + }
> > > +
> > > + memset(&attr, 0, sizeof(attr));
> > > + attr.size = sizeof(attr);
> > > + attr.type = PERF_TYPE_SOFTWARE;
> > > + attr.config = PERF_COUNT_SW_DUMMY;
> > > + attr.sample_period = 1;
> > > + attr.disabled = 1;
> > > + attr.watermark = 1;
> > > + attr.context_switch = 1;
> > > + attr.wakeup_watermark = 1;
> > > +
> > > + child = fork();
> > > + if (child == 0)
> > > + do_child();
> > > + else if (child < 0) {
> > > + pr_debug("failed fork() %d\n", errno);
> > > + goto cleanup;
> > > + }
> > > +
> > > + fd = sys_perf_event_open(&attr, child, -1, -1,
> > > + perf_event_open_cloexec_flag());
> > > + if (fd < 0) {
> > > + pr_debug("failed opening event %llx\n", attr.config);
> > > + goto cleanup;
> > > + }
> > > +
> > > + if (fcntl(fd, F_SETFL, FASYNC)) {
> > > + pr_debug("failed F_SETFL FASYNC %d\n", errno);
> > > + goto cleanup;
> > > + }
> >
> > Thanks for the work! The perf tool and perf test should run on older
> > kernels ideally without failure. I'm assuming this would fail on an
> > older kernel. Could we make the return value skip in that case?
>
> Ah, hmm, I wasn't aware of that. This would fail on an older kernel,
> yes. It's not possible to distinguish between an older kernel and a
> kernel where this fix broke (at least not without hardcoding in an
> expected good kernel version, which seems undesirable), so that would
> mean the test would always return ok or skip, not ok or fail. Is that
> ok?
Not great :-) Is there any hint from the errno? I was hoping for
EOPNOTSUPP but I'm guessing it will be EINVAL, in which case probably
something like:
/* Older kernels lack support. */
ret = (errno == EINVAL) ? TEST_SKIP : TEST_FAIL;
> > > +
> > > + if (fcntl(fd, F_SETOWN, getpid())) {
> > > + pr_debug("failed F_SETOWN getpid() %d\n", errno);
> > > + goto cleanup;
> > > + }
> > > +
> > > + if (fcntl(fd, F_SETSIG, SIGIO)) {
> > > + pr_debug("failed F_SETSIG SIGIO %d\n", errno);
> > > + goto cleanup;
> > > + }
> > > +
> > > + p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> > > + if (p == NULL) {
> > > + pr_debug("failed to mmap\n");
> > > + goto cleanup;
> > > + }
> > > +
> > > + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) {
> > > + pr_debug("failed PERF_EVENT_IOC_ENABLE %d\n", errno);
> > > + goto cleanup;
> > > + }
> > > +
> > > + sleep(30);
> >
> > This is kind of a painful wait, could it be less? No sleeps would be best :-)
>
> We could synchronize with the child using SIGSTOP instead of sleeping
> here. Not sure about getting rid of the tiny sleeps in the child. I
> have observed that sched_yield() doesn't reliably trigger context
> switches (which isn't surprising). I'll experiment with blocking on a
> pipe or something.
Great, thanks!
Ian
> - Kyle
>
> > Thanks,
> > Ian
> >
> > > +
> > > + ret = (sigio_count == 1) ? TEST_OK : TEST_FAIL;
> > > +
> > > +cleanup:
> > > + if (p != NULL)
> > > + munmap(p, 2 * page_size);
> > > +
> > > + if (fd >= 0)
> > > + close(fd);
> > > +
> > > + if (child > 0) {
> > > + kill(child, SIGTERM);
> > > + waitpid(child, NULL, 0);
> > > + }
> > > +
> > > + if (previous_sigio != SIG_ERR)
> > > + signal(SIGIO, previous_sigio);
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +DEFINE_SUITE("Watermark signal handler", watermark_signal);
> > > --
> > > 2.34.1
> > >
On Thu, Feb 22, 2024 at 10:33 AM Ian Rogers <[email protected]> wrote:
>
> On Thu, Feb 22, 2024 at 9:55 AM Kyle Huey <[email protected]> wrote:
> >
> > On Wed, Feb 21, 2024 at 10:36 AM Ian Rogers <[email protected]> wrote:
> > >
> > > On Wed, Feb 21, 2024 at 9:52 AM Kyle Huey <[email protected]> wrote:
> > > >
> > > > The test uses PERF_RECORD_SWITCH records to fill the ring buffer and
> > > > trigger the watermark wakeup, which in turn should trigger an IO
> > > > signal.
> > > >
> > > > Signed-off-by: Kyle Huey <[email protected]>
> > > > ---
> > > > tools/perf/tests/Build | 1 +
> > > > tools/perf/tests/builtin-test.c | 1 +
> > > > tools/perf/tests/tests.h | 1 +
> > > > tools/perf/tests/watermark_signal.c | 123 ++++++++++++++++++++++++++++
> > > > 4 files changed, 126 insertions(+)
> > > > create mode 100644 tools/perf/tests/watermark_signal.c
> > > >
> > > > diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
> > > > index 53ba9c3e20e0..de43eb60b280 100644
> > > > --- a/tools/perf/tests/Build
> > > > +++ b/tools/perf/tests/Build
> > > > @@ -67,6 +67,7 @@ perf-y += sigtrap.o
> > > > perf-y += event_groups.o
> > > > perf-y += symbols.o
> > > > perf-y += util.o
> > > > +perf-y += watermark_signal.o
> > > >
> > > > ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
> > > > perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
> > > > diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> > > > index 4a5973f9bb9b..715c01a2172a 100644
> > > > --- a/tools/perf/tests/builtin-test.c
> > > > +++ b/tools/perf/tests/builtin-test.c
> > > > @@ -124,6 +124,7 @@ static struct test_suite *generic_tests[] = {
> > > > &suite__event_groups,
> > > > &suite__symbols,
> > > > &suite__util,
> > > > + &suite__watermark_signal,
> > > > NULL,
> > > > };
> > > >
> > > > diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
> > > > index dad3d7414142..7ef4e0d0a77b 100644
> > > > --- a/tools/perf/tests/tests.h
> > > > +++ b/tools/perf/tests/tests.h
> > > > @@ -146,6 +146,7 @@ DECLARE_SUITE(sigtrap);
> > > > DECLARE_SUITE(event_groups);
> > > > DECLARE_SUITE(symbols);
> > > > DECLARE_SUITE(util);
> > > > +DECLARE_SUITE(watermark_signal);
> > > >
> > > > /*
> > > > * PowerPC and S390 do not support creation of instruction breakpoints using the
> > > > diff --git a/tools/perf/tests/watermark_signal.c b/tools/perf/tests/watermark_signal.c
> > > > new file mode 100644
> > > > index 000000000000..ae4abedc4b7c
> > > > --- /dev/null
> > > > +++ b/tools/perf/tests/watermark_signal.c
> > > > @@ -0,0 +1,123 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +#include <stddef.h>
> > > > +#include <signal.h>
> > > > +#include <stdlib.h>
> > > > +#include <string.h>
> > > > +#include <sys/ioctl.h>
> > > > +#include <sys/mman.h>
> > > > +#include <sys/wait.h>
> > > > +#include <unistd.h>
> > > > +#include <errno.h>
> > > > +#include <fcntl.h>
> > > > +
> > > > +#include "tests.h"
> > > > +#include "debug.h"
> > > > +#include "event.h"
> > > > +#include "../perf-sys.h"
> > > > +#include "cloexec.h"
> > > > +#include <internal/lib.h> // page_size
> > > > +
> > > > +static int sigio_count;
> > > > +
> > > > +static void handle_sigio(int sig __always_unused)
> > > > +{
> > > > + ++sigio_count;
> > > > +}
> > > > +
> > > > +static void do_child(void)
> > > > +{
> > > > + for (int i = 0; i < 20; ++i)
> > > > + sleep(1);
> > > > +
> > > > + exit(0);
> > > > +}
> > > > +
> > > > +static int test__watermark_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
> > > > +{
> > > > + struct perf_event_attr attr;
> > > > + struct perf_event_mmap_page *p = NULL;
> > > > + sighandler_t previous_sigio = SIG_ERR;
> > > > + pid_t child = -1;
> > > > + int fd = -1;
> > > > + int ret = TEST_FAIL;
> > > > +
> > > > + previous_sigio = signal(SIGIO, handle_sigio);
> > > > + if (previous_sigio == SIG_ERR) {
> > > > + pr_debug("failed setting SIGIO handler\n");
> > > > + goto cleanup;
> > > > + }
> > > > +
> > > > + memset(&attr, 0, sizeof(attr));
> > > > + attr.size = sizeof(attr);
> > > > + attr.type = PERF_TYPE_SOFTWARE;
> > > > + attr.config = PERF_COUNT_SW_DUMMY;
> > > > + attr.sample_period = 1;
> > > > + attr.disabled = 1;
> > > > + attr.watermark = 1;
> > > > + attr.context_switch = 1;
> > > > + attr.wakeup_watermark = 1;
> > > > +
> > > > + child = fork();
> > > > + if (child == 0)
> > > > + do_child();
> > > > + else if (child < 0) {
> > > > + pr_debug("failed fork() %d\n", errno);
> > > > + goto cleanup;
> > > > + }
> > > > +
> > > > + fd = sys_perf_event_open(&attr, child, -1, -1,
> > > > + perf_event_open_cloexec_flag());
> > > > + if (fd < 0) {
> > > > + pr_debug("failed opening event %llx\n", attr.config);
> > > > + goto cleanup;
> > > > + }
> > > > +
> > > > + if (fcntl(fd, F_SETFL, FASYNC)) {
> > > > + pr_debug("failed F_SETFL FASYNC %d\n", errno);
> > > > + goto cleanup;
> > > > + }
> > >
> > > Thanks for the work! The perf tool and perf test should run on older
> > > kernels ideally without failure. I'm assuming this would fail on an
> > > older kernel. Could we make the return value skip in that case?
> >
> > Ah, hmm, I wasn't aware of that. This would fail on an older kernel,
> > yes. It's not possible to distinguish between an older kernel and a
> > kernel where this fix broke (at least not without hardcoding in an
> > expected good kernel version, which seems undesirable), so that would
> > mean the test would always return ok or skip, not ok or fail. Is that
> > ok?
>
> Not great :-) Is there any hint from the errno? I was hoping for
> EOPNOTSUPP but I'm guessing it will be EINVAL, in which case probably
> something like:
> /* Older kernels lack support. */
> ret = (errno == EINVAL) ? TEST_SKIP : TEST_FAIL;
Ah, I see. The issue I am trying to fix is not that setting FASYNC is
rejected with an error code, it's that it doesn't actually send the IO
signal when the watermark is reached.
- Kyle
> > > > +
> > > > + if (fcntl(fd, F_SETOWN, getpid())) {
> > > > + pr_debug("failed F_SETOWN getpid() %d\n", errno);
> > > > + goto cleanup;
> > > > + }
> > > > +
> > > > + if (fcntl(fd, F_SETSIG, SIGIO)) {
> > > > + pr_debug("failed F_SETSIG SIGIO %d\n", errno);
> > > > + goto cleanup;
> > > > + }
> > > > +
> > > > + p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> > > > + if (p == NULL) {
> > > > + pr_debug("failed to mmap\n");
> > > > + goto cleanup;
> > > > + }
> > > > +
> > > > + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) {
> > > > + pr_debug("failed PERF_EVENT_IOC_ENABLE %d\n", errno);
> > > > + goto cleanup;
> > > > + }
> > > > +
> > > > + sleep(30);
> > >
> > > This is kind of a painful wait, could it be less? No sleeps would be best :-)
> >
> > We could synchronize with the child using SIGSTOP instead of sleeping
> > here. Not sure about getting rid of the tiny sleeps in the child. I
> > have observed that sched_yield() doesn't reliably trigger context
> > switches (which isn't surprising). I'll experiment with blocking on a
> > pipe or something.
>
> Great, thanks!
> Ian
>
> > - Kyle
> >
> > > Thanks,
> > > Ian
> > >
> > > > +
> > > > + ret = (sigio_count == 1) ? TEST_OK : TEST_FAIL;
> > > > +
> > > > +cleanup:
> > > > + if (p != NULL)
> > > > + munmap(p, 2 * page_size);
> > > > +
> > > > + if (fd >= 0)
> > > > + close(fd);
> > > > +
> > > > + if (child > 0) {
> > > > + kill(child, SIGTERM);
> > > > + waitpid(child, NULL, 0);
> > > > + }
> > > > +
> > > > + if (previous_sigio != SIG_ERR)
> > > > + signal(SIGIO, previous_sigio);
> > > > +
> > > > + return ret;
> > > > +}
> > > > +
> > > > +DEFINE_SUITE("Watermark signal handler", watermark_signal);
> > > > --
> > > > 2.34.1
> > > >
On Thu, Feb 22, 2024 at 10:41 AM Kyle Huey <[email protected]> wrote:
>
> On Thu, Feb 22, 2024 at 10:33 AM Ian Rogers <[email protected]> wrote:
> >
> > On Thu, Feb 22, 2024 at 9:55 AM Kyle Huey <[email protected]> wrote:
> > >
> > > On Wed, Feb 21, 2024 at 10:36 AM Ian Rogers <[email protected]> wrote:
> > > >
> > > > On Wed, Feb 21, 2024 at 9:52 AM Kyle Huey <[email protected]> wrote:
> > > > >
> > > > > The test uses PERF_RECORD_SWITCH records to fill the ring buffer and
> > > > > trigger the watermark wakeup, which in turn should trigger an IO
> > > > > signal.
> > > > >
> > > > > Signed-off-by: Kyle Huey <[email protected]>
> > > > > ---
> > > > > tools/perf/tests/Build | 1 +
> > > > > tools/perf/tests/builtin-test.c | 1 +
> > > > > tools/perf/tests/tests.h | 1 +
> > > > > tools/perf/tests/watermark_signal.c | 123 ++++++++++++++++++++++++++++
> > > > > 4 files changed, 126 insertions(+)
> > > > > create mode 100644 tools/perf/tests/watermark_signal.c
> > > > >
> > > > > diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
> > > > > index 53ba9c3e20e0..de43eb60b280 100644
> > > > > --- a/tools/perf/tests/Build
> > > > > +++ b/tools/perf/tests/Build
> > > > > @@ -67,6 +67,7 @@ perf-y += sigtrap.o
> > > > > perf-y += event_groups.o
> > > > > perf-y += symbols.o
> > > > > perf-y += util.o
> > > > > +perf-y += watermark_signal.o
> > > > >
> > > > > ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
> > > > > perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
> > > > > diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> > > > > index 4a5973f9bb9b..715c01a2172a 100644
> > > > > --- a/tools/perf/tests/builtin-test.c
> > > > > +++ b/tools/perf/tests/builtin-test.c
> > > > > @@ -124,6 +124,7 @@ static struct test_suite *generic_tests[] = {
> > > > > &suite__event_groups,
> > > > > &suite__symbols,
> > > > > &suite__util,
> > > > > + &suite__watermark_signal,
> > > > > NULL,
> > > > > };
> > > > >
> > > > > diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
> > > > > index dad3d7414142..7ef4e0d0a77b 100644
> > > > > --- a/tools/perf/tests/tests.h
> > > > > +++ b/tools/perf/tests/tests.h
> > > > > @@ -146,6 +146,7 @@ DECLARE_SUITE(sigtrap);
> > > > > DECLARE_SUITE(event_groups);
> > > > > DECLARE_SUITE(symbols);
> > > > > DECLARE_SUITE(util);
> > > > > +DECLARE_SUITE(watermark_signal);
> > > > >
> > > > > /*
> > > > > * PowerPC and S390 do not support creation of instruction breakpoints using the
> > > > > diff --git a/tools/perf/tests/watermark_signal.c b/tools/perf/tests/watermark_signal.c
> > > > > new file mode 100644
> > > > > index 000000000000..ae4abedc4b7c
> > > > > --- /dev/null
> > > > > +++ b/tools/perf/tests/watermark_signal.c
> > > > > @@ -0,0 +1,123 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > +#include <stddef.h>
> > > > > +#include <signal.h>
> > > > > +#include <stdlib.h>
> > > > > +#include <string.h>
> > > > > +#include <sys/ioctl.h>
> > > > > +#include <sys/mman.h>
> > > > > +#include <sys/wait.h>
> > > > > +#include <unistd.h>
> > > > > +#include <errno.h>
> > > > > +#include <fcntl.h>
> > > > > +
> > > > > +#include "tests.h"
> > > > > +#include "debug.h"
> > > > > +#include "event.h"
> > > > > +#include "../perf-sys.h"
> > > > > +#include "cloexec.h"
> > > > > +#include <internal/lib.h> // page_size
> > > > > +
> > > > > +static int sigio_count;
> > > > > +
> > > > > +static void handle_sigio(int sig __always_unused)
> > > > > +{
> > > > > + ++sigio_count;
> > > > > +}
> > > > > +
> > > > > +static void do_child(void)
> > > > > +{
> > > > > + for (int i = 0; i < 20; ++i)
> > > > > + sleep(1);
> > > > > +
> > > > > + exit(0);
> > > > > +}
> > > > > +
> > > > > +static int test__watermark_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
> > > > > +{
> > > > > + struct perf_event_attr attr;
> > > > > + struct perf_event_mmap_page *p = NULL;
> > > > > + sighandler_t previous_sigio = SIG_ERR;
> > > > > + pid_t child = -1;
> > > > > + int fd = -1;
> > > > > + int ret = TEST_FAIL;
> > > > > +
> > > > > + previous_sigio = signal(SIGIO, handle_sigio);
> > > > > + if (previous_sigio == SIG_ERR) {
> > > > > + pr_debug("failed setting SIGIO handler\n");
> > > > > + goto cleanup;
> > > > > + }
> > > > > +
> > > > > + memset(&attr, 0, sizeof(attr));
> > > > > + attr.size = sizeof(attr);
> > > > > + attr.type = PERF_TYPE_SOFTWARE;
> > > > > + attr.config = PERF_COUNT_SW_DUMMY;
> > > > > + attr.sample_period = 1;
> > > > > + attr.disabled = 1;
> > > > > + attr.watermark = 1;
> > > > > + attr.context_switch = 1;
> > > > > + attr.wakeup_watermark = 1;
> > > > > +
> > > > > + child = fork();
> > > > > + if (child == 0)
> > > > > + do_child();
> > > > > + else if (child < 0) {
> > > > > + pr_debug("failed fork() %d\n", errno);
> > > > > + goto cleanup;
> > > > > + }
> > > > > +
> > > > > + fd = sys_perf_event_open(&attr, child, -1, -1,
> > > > > + perf_event_open_cloexec_flag());
> > > > > + if (fd < 0) {
> > > > > + pr_debug("failed opening event %llx\n", attr.config);
> > > > > + goto cleanup;
> > > > > + }
> > > > > +
> > > > > + if (fcntl(fd, F_SETFL, FASYNC)) {
> > > > > + pr_debug("failed F_SETFL FASYNC %d\n", errno);
> > > > > + goto cleanup;
> > > > > + }
> > > >
> > > > Thanks for the work! The perf tool and perf test should run on older
> > > > kernels ideally without failure. I'm assuming this would fail on an
> > > > older kernel. Could we make the return value skip in that case?
> > >
> > > Ah, hmm, I wasn't aware of that. This would fail on an older kernel,
> > > yes. It's not possible to distinguish between an older kernel and a
> > > kernel where this fix broke (at least not without hardcoding in an
> > > expected good kernel version, which seems undesirable), so that would
> > > mean the test would always return ok or skip, not ok or fail. Is that
> > > ok?
> >
> > Not great :-) Is there any hint from the errno? I was hoping for
> > EOPNOTSUPP but I'm guessing it will be EINVAL, in which case probably
> > something like:
> > /* Older kernels lack support. */
> > ret = (errno == EINVAL) ? TEST_SKIP : TEST_FAIL;
>
> Ah, I see. The issue I am trying to fix is not that setting FASYNC is
> rejected with an error code, it's that it doesn't actually send the IO
> signal when the watermark is reached.
That's unfortunate. Perhaps similar to errno you could use uname,
return skip for failures on older kernels and fail on newer kernels.
Thanks,
Ian
> - Kyle
>
> > > > > +
> > > > > + if (fcntl(fd, F_SETOWN, getpid())) {
> > > > > + pr_debug("failed F_SETOWN getpid() %d\n", errno);
> > > > > + goto cleanup;
> > > > > + }
> > > > > +
> > > > > + if (fcntl(fd, F_SETSIG, SIGIO)) {
> > > > > + pr_debug("failed F_SETSIG SIGIO %d\n", errno);
> > > > > + goto cleanup;
> > > > > + }
> > > > > +
> > > > > + p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> > > > > + if (p == NULL) {
> > > > > + pr_debug("failed to mmap\n");
> > > > > + goto cleanup;
> > > > > + }
> > > > > +
> > > > > + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) {
> > > > > + pr_debug("failed PERF_EVENT_IOC_ENABLE %d\n", errno);
> > > > > + goto cleanup;
> > > > > + }
> > > > > +
> > > > > + sleep(30);
> > > >
> > > > This is kind of a painful wait, could it be less? No sleeps would be best :-)
> > >
> > > We could synchronize with the child using SIGSTOP instead of sleeping
> > > here. Not sure about getting rid of the tiny sleeps in the child. I
> > > have observed that sched_yield() doesn't reliably trigger context
> > > switches (which isn't surprising). I'll experiment with blocking on a
> > > pipe or something.
> >
> > Great, thanks!
> > Ian
> >
> > > - Kyle
> > >
> > > > Thanks,
> > > > Ian
> > > >
> > > > > +
> > > > > + ret = (sigio_count == 1) ? TEST_OK : TEST_FAIL;
> > > > > +
> > > > > +cleanup:
> > > > > + if (p != NULL)
> > > > > + munmap(p, 2 * page_size);
> > > > > +
> > > > > + if (fd >= 0)
> > > > > + close(fd);
> > > > > +
> > > > > + if (child > 0) {
> > > > > + kill(child, SIGTERM);
> > > > > + waitpid(child, NULL, 0);
> > > > > + }
> > > > > +
> > > > > + if (previous_sigio != SIG_ERR)
> > > > > + signal(SIGIO, previous_sigio);
> > > > > +
> > > > > + return ret;
> > > > > +}
> > > > > +
> > > > > +DEFINE_SUITE("Watermark signal handler", watermark_signal);
> > > > > --
> > > > > 2.34.1
> > > > >
On Thu, Feb 22, 2024 at 09:55:36AM -0800, Kyle Huey wrote:
> On Wed, Feb 21, 2024 at 10:36 AM Ian Rogers <[email protected]> wrote:
> > On Wed, Feb 21, 2024 at 9:52 AM Kyle Huey <[email protected]> wrote:
> > > + if (fcntl(fd, F_SETFL, FASYNC)) {
> > > + pr_debug("failed F_SETFL FASYNC %d\n", errno);
> > > + goto cleanup;
> > > + }
> > Thanks for the work! The perf tool and perf test should run on older
> > kernels ideally without failure. I'm assuming this would fail on an
> > older kernel. Could we make the return value skip in that case?
> Ah, hmm, I wasn't aware of that. This would fail on an older kernel,
> yes. It's not possible to distinguish between an older kernel and a
> kernel where this fix broke (at least not without hardcoding in an
> expected good kernel version, which seems undesirable), so that would
Take a look at:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/tools/perf/tests/sigtrap.c?id=650e0bde43f35bb675e87e30f679a57cfa22e0e5
To see how introspecting using BTF can be used to figure out if internal
data structures have what is needed, or if functions with some specific
arguments are present, etc, for sigtrap we have, in the patch above:
- TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on);
+ expected_sigtraps = NUM_THREADS * ctx.iterate_on;
+
+ if (ctx.signal_count < expected_sigtraps && kernel_with_sleepable_spinlocks()) {
+ pr_debug("Expected %d sigtraps, got %d, running on a kernel with sleepable spinlocks.\n",
+ expected_sigtraps, ctx.signal_count);
+ pr_debug("See https://lore.kernel.org/all/[email protected]/\n");
+ return TEST_SKIP;
+ } else
+ TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, expected_sigtraps);
And then:
+static bool kernel_with_sleepable_spinlocks(void)
+{
+ const struct btf_member *member;
+ const struct btf_type *type;
+ const char *type_name;
+ int id;
+
+ if (!btf__available())
+ return false;
+
+ id = btf__find_by_name_kind(btf, "spinlock", BTF_KIND_STRUCT);
+ if (id < 0)
+ return false;
+
+ // Only RT has a "lock" member for "struct spinlock"
+ member = __btf_type__find_member_by_name(id, "lock");
+ if (member == NULL)
+ return false;
+
+ // But check its type as well
+ type = btf__type_by_id(btf, member->type);
+ if (!type || !btf_is_struct(type))
+ return false;
+
+ type_name = btf__name_by_offset(btf, type->name_off);
+ return type_name && !strcmp(type_name, "rt_mutex_base");
+}
> mean the test would always return ok or skip, not ok or fail. Is that
> ok?
It should return:
Ok if the kernel has what is needed and the test passes
Skip if the test fails and the kernel doesn't have what is needed
FAIL if the test fails and the kernel HAS what is needed.
'perf test sigtrap' also checks for the presence of the feature it
requires:
static bool attr_has_sigtrap(void)
{
int id;
if (!btf__available()) {
/* should be an old kernel */
return false;
}
id = btf__find_by_name_kind(btf, "perf_event_attr", BTF_KIND_STRUCT);
if (id < 0)
return false;
return __btf_type__find_member_by_name(id, "sigtrap") != NULL;
}
fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
if (fd < 0) {
if (attr_has_sigtrap()) {
pr_debug("FAILED sys_perf_event_open(): %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
} else {
pr_debug("perf_event_attr doesn't have sigtrap\n");
ret = TEST_SKIP;
}
goto out_restore_sigaction;
}
- Arnaldo
On Thu, Feb 22, 2024 at 11:54 AM Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> On Thu, Feb 22, 2024 at 09:55:36AM -0800, Kyle Huey wrote:
> > On Wed, Feb 21, 2024 at 10:36 AM Ian Rogers <[email protected]> wrote:
> > > On Wed, Feb 21, 2024 at 9:52 AM Kyle Huey <[email protected]> wrote:
> > > > + if (fcntl(fd, F_SETFL, FASYNC)) {
> > > > + pr_debug("failed F_SETFL FASYNC %d\n", errno);
> > > > + goto cleanup;
> > > > + }
>
> > > Thanks for the work! The perf tool and perf test should run on older
> > > kernels ideally without failure. I'm assuming this would fail on an
> > > older kernel. Could we make the return value skip in that case?
>
> > Ah, hmm, I wasn't aware of that. This would fail on an older kernel,
> > yes. It's not possible to distinguish between an older kernel and a
> > kernel where this fix broke (at least not without hardcoding in an
> > expected good kernel version, which seems undesirable), so that would
>
> Take a look at:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/tools/perf/tests/sigtrap.c?id=650e0bde43f35bb675e87e30f679a57cfa22e0e5
>
> To see how introspecting using BTF can be used to figure out if internal
> data structures have what is needed, or if functions with some specific
> arguments are present, etc, for sigtrap we have, in the patch above:
>
> - TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on);
> + expected_sigtraps = NUM_THREADS * ctx.iterate_on;
> +
> + if (ctx.signal_count < expected_sigtraps && kernel_with_sleepable_spinlocks()) {
> + pr_debug("Expected %d sigtraps, got %d, running on a kernel with sleepable spinlocks.\n",
> + expected_sigtraps, ctx.signal_count);
> + pr_debug("See https://lore.kernel.org/all/[email protected]/\n");
> + return TEST_SKIP;
> + } else
> + TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, expected_sigtraps);
>
> And then:
>
> +static bool kernel_with_sleepable_spinlocks(void)
> +{
> + const struct btf_member *member;
> + const struct btf_type *type;
> + const char *type_name;
> + int id;
> +
> + if (!btf__available())
> + return false;
> +
> + id = btf__find_by_name_kind(btf, "spinlock", BTF_KIND_STRUCT);
> + if (id < 0)
> + return false;
> +
> + // Only RT has a "lock" member for "struct spinlock"
> + member = __btf_type__find_member_by_name(id, "lock");
> + if (member == NULL)
> + return false;
> +
> + // But check its type as well
> + type = btf__type_by_id(btf, member->type);
> + if (!type || !btf_is_struct(type))
> + return false;
> +
> + type_name = btf__name_by_offset(btf, type->name_off);
> + return type_name && !strcmp(type_name, "rt_mutex_base");
> +}
>
> > mean the test would always return ok or skip, not ok or fail. Is that
> > ok?
>
> It should return:
>
> Ok if the kernel has what is needed and the test passes
> Skip if the test fails and the kernel doesn't have what is needed
> FAIL if the test fails and the kernel HAS what is needed.
>
> 'perf test sigtrap' also checks for the presence of the feature it
> requires:
>
> static bool attr_has_sigtrap(void)
> {
> int id;
>
> if (!btf__available()) {
> /* should be an old kernel */
> return false;
> }
>
> id = btf__find_by_name_kind(btf, "perf_event_attr", BTF_KIND_STRUCT);
> if (id < 0)
> return false;
>
> return __btf_type__find_member_by_name(id, "sigtrap") != NULL;
> }
>
> fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
> if (fd < 0) {
> if (attr_has_sigtrap()) {
> pr_debug("FAILED sys_perf_event_open(): %s\n",
> str_error_r(errno, sbuf, sizeof(sbuf)));
> } else {
> pr_debug("perf_event_attr doesn't have sigtrap\n");
> ret = TEST_SKIP;
> }
> goto out_restore_sigaction;
> }
>
> - Arnaldo
I think perhaps I'm barking up the wrong tree here. This seems like a
ton of work just to write a regression test. Maybe I should be doing
this in tools/testing/selftests instead?
- Kyle
On Fri, Feb 23, 2024 at 9:35 AM Kyle Huey <[email protected]> wrote:
>
> On Thu, Feb 22, 2024 at 11:54 AM Arnaldo Carvalho de Melo
> <[email protected]> wrote:
> >
> > On Thu, Feb 22, 2024 at 09:55:36AM -0800, Kyle Huey wrote:
> > > On Wed, Feb 21, 2024 at 10:36 AM Ian Rogers <[email protected]> wrote:
> > > > On Wed, Feb 21, 2024 at 9:52 AM Kyle Huey <[email protected]> wrote:
> > > > > + if (fcntl(fd, F_SETFL, FASYNC)) {
> > > > > + pr_debug("failed F_SETFL FASYNC %d\n", errno);
> > > > > + goto cleanup;
> > > > > + }
> >
> > > > Thanks for the work! The perf tool and perf test should run on older
> > > > kernels ideally without failure. I'm assuming this would fail on an
> > > > older kernel. Could we make the return value skip in that case?
> >
> > > Ah, hmm, I wasn't aware of that. This would fail on an older kernel,
> > > yes. It's not possible to distinguish between an older kernel and a
> > > kernel where this fix broke (at least not without hardcoding in an
> > > expected good kernel version, which seems undesirable), so that would
> >
> > Take a look at:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/tools/perf/tests/sigtrap.c?id=650e0bde43f35bb675e87e30f679a57cfa22e0e5
> >
> > To see how introspecting using BTF can be used to figure out if internal
> > data structures have what is needed, or if functions with some specific
> > arguments are present, etc, for sigtrap we have, in the patch above:
> >
> > - TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on);
> > + expected_sigtraps = NUM_THREADS * ctx.iterate_on;
> > +
> > + if (ctx.signal_count < expected_sigtraps && kernel_with_sleepable_spinlocks()) {
> > + pr_debug("Expected %d sigtraps, got %d, running on a kernel with sleepable spinlocks.\n",
> > + expected_sigtraps, ctx.signal_count);
> > + pr_debug("See https://lore.kernel.org/all/[email protected]/\n");
> > + return TEST_SKIP;
> > + } else
> > + TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, expected_sigtraps);
> >
> > And then:
> >
> > +static bool kernel_with_sleepable_spinlocks(void)
> > +{
> > + const struct btf_member *member;
> > + const struct btf_type *type;
> > + const char *type_name;
> > + int id;
> > +
> > + if (!btf__available())
> > + return false;
> > +
> > + id = btf__find_by_name_kind(btf, "spinlock", BTF_KIND_STRUCT);
> > + if (id < 0)
> > + return false;
> > +
> > + // Only RT has a "lock" member for "struct spinlock"
> > + member = __btf_type__find_member_by_name(id, "lock");
> > + if (member == NULL)
> > + return false;
> > +
> > + // But check its type as well
> > + type = btf__type_by_id(btf, member->type);
> > + if (!type || !btf_is_struct(type))
> > + return false;
> > +
> > + type_name = btf__name_by_offset(btf, type->name_off);
> > + return type_name && !strcmp(type_name, "rt_mutex_base");
> > +}
> >
> > > mean the test would always return ok or skip, not ok or fail. Is that
> > > ok?
> >
> > It should return:
> >
> > Ok if the kernel has what is needed and the test passes
> > Skip if the test fails and the kernel doesn't have what is needed
> > FAIL if the test fails and the kernel HAS what is needed.
> >
> > 'perf test sigtrap' also checks for the presence of the feature it
> > requires:
> >
> > static bool attr_has_sigtrap(void)
> > {
> > int id;
> >
> > if (!btf__available()) {
> > /* should be an old kernel */
> > return false;
> > }
> >
> > id = btf__find_by_name_kind(btf, "perf_event_attr", BTF_KIND_STRUCT);
> > if (id < 0)
> > return false;
> >
> > return __btf_type__find_member_by_name(id, "sigtrap") != NULL;
> > }
> >
> > fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
> > if (fd < 0) {
> > if (attr_has_sigtrap()) {
> > pr_debug("FAILED sys_perf_event_open(): %s\n",
> > str_error_r(errno, sbuf, sizeof(sbuf)));
> > } else {
> > pr_debug("perf_event_attr doesn't have sigtrap\n");
> > ret = TEST_SKIP;
> > }
> > goto out_restore_sigaction;
> > }
> >
> > - Arnaldo
>
> I think perhaps I'm barking up the wrong tree here. This seems like a
> ton of work just to write a regression test. Maybe I should be doing
> this in tools/testing/selftests instead?
The problem is detecting support for the feature in the kernel. The
BTF approach isn't that bad, a couple of finds, but I think in this
case there isn't anything to be found to indicate the feature is
present. I like the perf test as perf tests are a form of
documentation. Perhaps just using TEST_SKIP here (rather than
TEST_FAIL) is best and the skip_reason can be a presumed lack of
kernel support.
Thanks,
Ian
> - Kyle
On Fri, Feb 23, 2024 at 09:35:29AM -0800, Kyle Huey wrote:
> On Thu, Feb 22, 2024 at 11:54 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
> > if (fd < 0) {
> > if (attr_has_sigtrap()) {
> > pr_debug("FAILED sys_perf_event_open(): %s\n",
> > str_error_r(errno, sbuf, sizeof(sbuf)));
> > } else {
> > pr_debug("perf_event_attr doesn't have sigtrap\n");
> > ret = TEST_SKIP;
> > }
> > goto out_restore_sigaction;
> I think perhaps I'm barking up the wrong tree here. This seems like a
> ton of work just to write a regression test. Maybe I should be doing
> this in tools/testing/selftests instead?
Well, if it tests a perf feature, then yeah, that would be better placed
in 'perf test'.
Maybe you can just assume this will run just on modern kernels where
this feature is expected to be present and then add some pr_debug()
stating that maybe this feature is not present in the kernel.
For the vast majority of cases this will be enough, so that is what I
encourage you to do now.
Its possible that distros, like Red Hat, end up backporting first the
perf tools with this test and not the feature it is being tested, if
that happens, then it will eventually come to my attention and I will be
able to do the BTF treatment, as in that case even a test based on the
kernel version would be insufficient.
- Arnaldo
On Fri, Feb 23, 2024 at 10:01:31AM -0800, Ian Rogers wrote:
> On Fri, Feb 23, 2024 at 9:35 AM Kyle Huey <[email protected]> wrote:
> > I think perhaps I'm barking up the wrong tree here. This seems like a
> > ton of work just to write a regression test. Maybe I should be doing
> > this in tools/testing/selftests instead?
> The problem is detecting support for the feature in the kernel. The
> BTF approach isn't that bad, a couple of finds, but I think in this
> case there isn't anything to be found to indicate the feature is
> present. I like the perf test as perf tests are a form of
> documentation. Perhaps just using TEST_SKIP here (rather than
> TEST_FAIL) is best and the skip_reason can be a presumed lack of
> kernel support.
But going forward the general expectation is that it should pass as
the feature _is_ present, isn't it?
- Arnaldo
(I work with Kyle.)
IMHO this is more of a bug fix than a feature. `man perf_event_open`
expects this to work already: "watermark: If set, have an overflow
notification happen when we cross the wakeup_watermark boundary" and
later "Alternatively, the overflow events can be captured via a signal
handler, by enabling I/O signaling".
Bug fixes need regression tests. Such tests should fail on any kernel
where the bug is present. It seems strange to expect each such test to
detect whether the bug "should be fixed" in the kernel it's running on
and skip when that's not the case. I haven't seen any other project
try to do this. Instead (as in kernel selftests) the tests, the code
under test, and any metadata about which tests are expected to pass
are all in the repository together and updated together.
It makes sense that tests for the code in tools/perf should not fail
on older kernels, given that the code in tools/perf is expected to
work on older kernels. But tests for bug fixes in the kernel itself
should be expected to fail on older kernels and therefore should live
somewhere else, IMHO.
Rob
--
Su ot deraeppa sah dna Rehtaf eht htiw saw hcihw, efil lanrete eht uoy
ot mialcorp ew dna, ti ot yfitset dna ti nees evah ew; deraeppa efil
eht. Efil fo Drow eht gninrecnoc mialcorp ew siht - dehcuot evah sdnah
ruo dna ta dekool evah ew hcihw, seye ruo htiw nees evah ew hcihw,
draeh evah ew hcihw, gninnigeb eht morf saw hcihw taht.
Hello,
On Fri, Feb 23, 2024 at 1:44 PM Robert O'Callahan <robert@ocallahanorg> wrote:
>
> (I work with Kyle.)
>
> IMHO this is more of a bug fix than a feature. `man perf_event_open`
> expects this to work already: "watermark: If set, have an overflow
> notification happen when we cross the wakeup_watermark boundary" and
> later "Alternatively, the overflow events can be captured via a signal
> handler, by enabling I/O signaling".
>
> Bug fixes need regression tests. Such tests should fail on any kernel
> where the bug is present. It seems strange to expect each such test to
> detect whether the bug "should be fixed" in the kernel it's running on
> and skip when that's not the case. I haven't seen any other project
> try to do this. Instead (as in kernel selftests) the tests, the code
> under test, and any metadata about which tests are expected to pass
> are all in the repository together and updated together.
>
> It makes sense that tests for the code in tools/perf should not fail
> on older kernels, given that the code in tools/perf is expected to
> work on older kernels. But tests for bug fixes in the kernel itself
> should be expected to fail on older kernels and therefore should live
> somewhere else, IMHO.
I think it makes sense to put the test in the selftests and to be
deployed with the kernel. AFAICS it doesn't have anything
specific to the perf tools and tests the general kernel behavior.
Thanks,
Namhyung
On Sat, Feb 24, 2024 at 10:43:38AM +1300, Robert O'Callahan wrote:
> (I work with Kyle.)
>
> IMHO this is more of a bug fix than a feature. `man perf_event_open`
> expects this to work already: "watermark: If set, have an overflow
> notification happen when we cross the wakeup_watermark boundary" and
> later "Alternatively, the overflow events can be captured via a signal
> handler, by enabling I/O signaling".
>
> Bug fixes need regression tests. Such tests should fail on any kernel
> where the bug is present. It seems strange to expect each such test to
> detect whether the bug "should be fixed" in the kernel it's running on
> and skip when that's not the case. I haven't seen any other project
> try to do this. Instead (as in kernel selftests) the tests, the code
> under test, and any metadata about which tests are expected to pass
> are all in the repository together and updated together.
>
> It makes sense that tests for the code in tools/perf should not fail
> on older kernels, given that the code in tools/perf is expected to
> work on older kernels. But tests for bug fixes in the kernel itself
> should be expected to fail on older kernels and therefore should live
> somewhere else, IMHO.
That is fine, I was mostly trying to help in having the 'perf test'
patch posted to address the review comments from Ian.
And I'm biased because I work on a company that does lots of backports
and then test perf functionality using 'perf test'.
And also because the kernel now has this BTF information that can be
used for introspection.
Being able to run the installed 'perf' tool and check if some fix is in
place and its regression test is passing looked like an improvement
when compared to the selftests method.
If in the end people decide to do this in selftests, that is ok with me.
- Arnaldo