The tracing ring-buffers can be stored on disk or sent to network without any
copy via splice. However the later doesn't allow real time processing of the
traces. A solution is to give access to userspace to the ring-buffer pages
directly via a mapping. A piece of software can now become a reader of the
ring-buffer, and drive a consuming or non-consuming read in a similar fashion to
what trace and trace_pipe offer.
Attached to this cover letter an example of consuming read for a ring-buffer,
using libtracefs.
Vincent
v3 -> v3:
* Add to the meta-page:
- pages_lost / pages_read (allow to compute how full is the ring-buffer)
- read (allow to compute how many entries can be read)
- A reader_page struct.
* Rename ring_buffer_meta_header -> ring_buffer_meta
* Rename ring_buffer_get_reader_page -> ring_buffer_map_get_reader_page
* Properly consume events on ring_buffer_map_get_reader_page() with
rb_advance_reader().
v2 -> v3:
* Remove data page list (for non-consuming read)
** Implies removing order > 0 meta-page
* Add a new meta page field ->read
* Rename ring_buffer_meta_page_header into ring_buffer_meta_header
v1 -> v2:
* Hide data_pages from the userspace struct
* Fix META_PAGE_MAX_PAGES
* Support for order > 0 meta-page
* Add missing page->mapping.
Vincent Donnefort (2):
ring-buffer: Introducing ring-buffer mapping functions
tracing: Allow user-space mapping of the ring-buffer
include/linux/ring_buffer.h | 7 +
include/uapi/linux/trace_mmap.h | 28 +++
kernel/trace/ring_buffer.c | 322 +++++++++++++++++++++++++++++++-
kernel/trace/trace.c | 72 ++++++-
4 files changed, 422 insertions(+), 7 deletions(-)
create mode 100644 include/uapi/linux/trace_mmap.h
--
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <signal.h>
#include <errno.h>
#include <unistd.h>
#include <tracefs.h>
#include <kbuffer.h>
#include <event-parse.h>
#include <asm/types.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#define TRACE_MMAP_IOCTL_GET_READER_PAGE _IO('T', 0x1)
struct ring_buffer_meta {
unsigned long entries;
unsigned long overrun;
unsigned long read;
unsigned long pages_touched;
unsigned long pages_lost;
unsigned long pages_read;
__u32 meta_page_size;
__u32 nr_data_pages; /* Number of pages in the ring-buffer */
struct reader_page {
__u32 id; /* Reader page ID from 0 to nr_data_pages - 1 */
__u32 read; /* Number of bytes read on the reader page */
unsigned long lost_events; /* Events lost at the time of the reader swap */
} reader_page;
};
static char *argv0;
static bool exit_requested;
static char *get_this_name(void)
{
static char *this_name;
char *arg;
char *p;
if (this_name)
return this_name;
arg = argv0;
p = arg+strlen(arg);
while (p >= arg && *p != '/')
p--;
p++;
this_name = p;
return p;
}
static void __vdie(const char *fmt, va_list ap, int err)
{
int ret = errno;
char *p = get_this_name();
if (err && errno)
perror(p);
else
ret = -1;
fprintf(stderr, " ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(ret);
}
void pdie(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
__vdie(fmt, ap, 1);
va_end(ap);
}
#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
static unsigned long number_entries(struct ring_buffer_meta *meta)
{
return READ_ONCE(meta->entries) - (READ_ONCE(meta->overrun) +
READ_ONCE(meta->read));
}
static void read_page(struct tep_handle *tep, struct kbuffer *kbuf)
{
static struct trace_seq seq;
struct tep_record record;
if (seq.buffer)
trace_seq_reset(&seq);
else
trace_seq_init(&seq);
while ((record.data = kbuffer_read_event(kbuf, &record.ts))) {
kbuffer_next_event(kbuf, NULL);
tep_print_event(tep, &seq, &record,
"%s-%d %9d\t%s\n", TEP_PRINT_COMM,
TEP_PRINT_PID, TEP_PRINT_TIME, TEP_PRINT_NAME);
trace_seq_do_printf(&seq);
trace_seq_reset(&seq);
}
}
static int next_reader_page(int fd, struct ring_buffer_meta *meta, unsigned long *read)
{
__u32 prev_reader, new_reader;
unsigned long prev_read;
prev_read = READ_ONCE(meta->reader_page.read);
prev_reader = READ_ONCE(meta->reader_page.id);
if (ioctl(fd, TRACE_MMAP_IOCTL_GET_READER_PAGE) < 0)
pdie("ioctl");
new_reader = READ_ONCE(meta->reader_page.id);
if (prev_reader != new_reader)
*read = 0;
else
*read = prev_read;
return new_reader;
}
static void signal_handler(int unused)
{
printf("Exit!\n");
exit_requested = true;
}
int main(int argc, char **argv)
{
int page_size, meta_len, data_len, page, fd;
struct ring_buffer_meta *map;
struct tep_handle *tep;
struct kbuffer *kbuf;
unsigned long read;
void *meta, *data;
char path[32];
int cpu;
if (argc != 2)
return -EINVAL;
argv0 = argv[0];
cpu = atoi(argv[1]);
snprintf(path, 32, "per_cpu/cpu%d/trace_pipe_raw", cpu);
tep = tracefs_local_events(NULL);
kbuf = tep_kbuffer(tep);
page_size = getpagesize();
fd = tracefs_instance_file_open(NULL, path, O_RDONLY);
if (fd < 0)
pdie("raw");
meta = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
if (meta == MAP_FAILED)
pdie("mmap");
map = (struct ring_buffer_meta *)meta;
meta_len = map->meta_page_size;
printf("entries: %lu\n", map->entries);
printf("overrun: %lu\n", map->overrun);
printf("read: %lu\n", map->read);
printf("pages_touched: %lu\n", map->pages_touched);
printf("pages_lost: %lu\n", map->pages_lost);
printf("pages_read: %lu\n", map->pages_read);
data_len = page_size * map->nr_data_pages;
data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, fd, meta_len);
if (data == MAP_FAILED)
pdie("mmap data");
signal(SIGINT, signal_handler);
while (!exit_requested) {
if (!number_entries(map)) {
usleep(100000);
continue;
}
page = next_reader_page(fd, map, &read);
kbuffer_load_subbuffer(kbuf, data + page_size * page);
while (kbuf->curr < read)
kbuffer_next_event(kbuf, NULL);
read_page(tep, kbuf);
}
munmap(data, data_len);
munmap(meta, page_size);
close(fd);
return 0;
}
--
2.41.0.162.gfafddb0af9-goog
Currently, user-space extracts data from the ring-buffer via splice,
which is handy for storage or network sharing. However, due to splice
limitations, it is imposible to do real-time analysis without a copy.
A solution for that problem is to let the user-space map the ring-buffer
directly.
The mapping exposed via the per-CPU file trace_pipe_raw. The first page
is the meta-page and is followed by each page of the ring-buffer,
ordered by their unique page ID. It is therefore easy to translate a
page-ID to an offset in the mapping.
* Meta-page -- include/uapi/linux/trace_mmap.h for a description
* Page ID 0
* Page ID 1
...
The mapper must then do what use to be the kernel job: swap the reader
with the head. This is done with a newly introduced ioctl:
TRACE_MMAP_IOCTL_GET_READER_PAGE.
Signed-off-by: Vincent Donnefort <[email protected]>
diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
index 653176cc50bc..fd323a92cf78 100644
--- a/include/uapi/linux/trace_mmap.h
+++ b/include/uapi/linux/trace_mmap.h
@@ -23,4 +23,6 @@ struct ring_buffer_meta {
} reader_page;
};
+#define TRACE_MMAP_IOCTL_GET_READER_PAGE _IO('T', 0x1)
+
#endif /* _UAPI_TRACE_MMAP_H_ */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 64a4dde073ef..ccc0e1dd7d0d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6432,7 +6432,7 @@ static void tracing_set_nop(struct trace_array *tr)
{
if (tr->current_trace == &nop_trace)
return;
-
+
tr->current_trace->enabled--;
if (tr->current_trace->reset)
@@ -8465,15 +8465,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
return ret;
}
-/* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct ftrace_buffer_info *info = file->private_data;
struct trace_iterator *iter = &info->iter;
- if (cmd)
- return -ENOIOCTLCMD;
+ if (cmd == TRACE_MMAP_IOCTL_GET_READER_PAGE)
+ return ring_buffer_map_get_reader_page(iter->array_buffer->buffer,
+ iter->cpu_file);
+ else if (cmd)
+ return -ENOTTY;
+ /*
+ * An ioctl call with cmd 0 to the ring buffer file will wake up all
+ * waiters
+ */
mutex_lock(&trace_types_lock);
iter->wait_index++;
@@ -8486,6 +8492,63 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned
return 0;
}
+static vm_fault_t tracing_buffers_mmap_fault(struct vm_fault *vmf)
+{
+ struct ftrace_buffer_info *info = vmf->vma->vm_file->private_data;
+ struct trace_iterator *iter = &info->iter;
+ vm_fault_t ret = VM_FAULT_SIGBUS;
+ struct page *page;
+
+ page = ring_buffer_map_fault(iter->array_buffer->buffer, iter->cpu_file,
+ vmf->pgoff);
+ if (!page)
+ return ret;
+
+ vmf->page = page;
+
+ get_page(vmf->page);
+ vmf->page->mapping = vmf->vma->vm_file->f_mapping;
+ vmf->page->index = vmf->pgoff;
+
+ return 0;
+}
+
+static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
+{
+ struct ftrace_buffer_info *info = vma->vm_file->private_data;
+ struct trace_iterator *iter = &info->iter;
+
+ ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file);
+}
+
+static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
+{
+ struct ftrace_buffer_info *info = vma->vm_file->private_data;
+ struct trace_iterator *iter = &info->iter;
+
+ WARN_ON(ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file));
+}
+
+static const struct vm_operations_struct tracing_buffers_vmops = {
+ .open = tracing_buffers_mmap_open,
+ .close = tracing_buffers_mmap_close,
+ .fault = tracing_buffers_mmap_fault,
+};
+
+static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct ftrace_buffer_info *info = filp->private_data;
+ struct trace_iterator *iter = &info->iter;
+
+ if (vma->vm_flags & VM_WRITE)
+ return -EPERM;
+
+ vm_flags_mod(vma, VM_DONTCOPY | VM_DONTDUMP, VM_MAYWRITE);
+ vma->vm_ops = &tracing_buffers_vmops;
+
+ return ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file);
+}
+
static const struct file_operations tracing_buffers_fops = {
.open = tracing_buffers_open,
.read = tracing_buffers_read,
@@ -8494,6 +8557,7 @@ static const struct file_operations tracing_buffers_fops = {
.splice_read = tracing_buffers_splice_read,
.unlocked_ioctl = tracing_buffers_ioctl,
.llseek = no_llseek,
+ .mmap = tracing_buffers_mmap,
};
static ssize_t
--
2.41.0.162.gfafddb0af9-goog
In preparation for allowing the user-space to map a ring-buffer, add
a set of mapping functions:
ring_buffer_{map,unmap}()
ring_buffer_map_fault()
And controls on the ring-buffer:
ring_buffer_map_get_reader_page() /* swap reader and head */
Mapping the ring-buffer also involves:
A unique ID for each page of the ring-buffer, as currently the pages
are only identified through their in-kernel VA.
A meta-page, where are stored statistics about the ring-buffer and
a page IDs list, ordered. A field gives what page is the reader
one and one to gives where the ring-buffer starts in the list of data
pages.
The linear mapping exposes the meta-page, and each page of the
ring-buffer, ordered following their unique ID, assigned during the
first mapping.
Once mapped, no page can get in or out of the ring-buffer: the buffer
size will remain unmodified and the splice enabling functions will in
reality simply memcpy the data instead of swapping pages.
Signed-off-by: Vincent Donnefort <[email protected]>
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 782e14f62201..980abfbd92ed 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -6,6 +6,8 @@
#include <linux/seq_file.h>
#include <linux/poll.h>
+#include <uapi/linux/trace_mmap.h>
+
struct trace_buffer;
struct ring_buffer_iter;
@@ -211,4 +213,9 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node);
#define trace_rb_cpu_prepare NULL
#endif
+int ring_buffer_map(struct trace_buffer *buffer, int cpu);
+int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
+struct page *ring_buffer_map_fault(struct trace_buffer *buffer, int cpu,
+ unsigned long pgoff);
+int ring_buffer_map_get_reader_page(struct trace_buffer *buffer, int cpu);
#endif /* _LINUX_RING_BUFFER_H */
diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
new file mode 100644
index 000000000000..653176cc50bc
--- /dev/null
+++ b/include/uapi/linux/trace_mmap.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_TRACE_MMAP_H_
+#define _UAPI_TRACE_MMAP_H_
+
+#include <linux/types.h>
+
+struct ring_buffer_meta {
+ unsigned long entries;
+ unsigned long overrun;
+ unsigned long read;
+
+ unsigned long pages_touched;
+ unsigned long pages_lost;
+ unsigned long pages_read;
+
+ __u32 meta_page_size;
+ __u32 nr_data_pages; /* Number of pages in the ring-buffer */
+
+ struct reader_page {
+ __u32 id; /* Reader page ID from 0 to nr_data_pages - 1 */
+ __u32 read; /* Number of bytes read on the reader page */
+ unsigned long lost_events; /* Events lost at the time of the reader swap */
+ } reader_page;
+};
+
+#endif /* _UAPI_TRACE_MMAP_H_ */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 834b361a4a66..0e8137161955 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -332,6 +332,7 @@ struct buffer_page {
local_t entries; /* entries on this page */
unsigned long real_end; /* real end of data */
struct buffer_data_page *page; /* Actual data page */
+ u32 id; /* ID for external mapping */
};
/*
@@ -523,6 +524,12 @@ struct ring_buffer_per_cpu {
rb_time_t before_stamp;
u64 event_stamp[MAX_NEST];
u64 read_stamp;
+
+ int mapped;
+ struct mutex mapping_lock;
+ unsigned long *page_ids; /* ID to addr */
+ struct ring_buffer_meta *meta_page;
+
/* ring buffer pages to update, > 0 to add, < 0 to remove */
long nr_pages_to_update;
struct list_head new_pages; /* new pages to add */
@@ -1561,6 +1568,13 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
/* Again, either we update tail_page or an interrupt does */
(void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
}
+
+ if (READ_ONCE(cpu_buffer->mapped)) {
+ /* Ensure the meta_page is ready */
+ smp_rmb();
+ WRITE_ONCE(cpu_buffer->meta_page->pages_touched,
+ local_read(&cpu_buffer->pages_touched));
+ }
}
static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
@@ -1724,6 +1738,7 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
init_waitqueue_head(&cpu_buffer->irq_work.waiters);
init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
+ mutex_init(&cpu_buffer->mapping_lock);
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
@@ -2168,7 +2183,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
/* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
-
if (cpu_id == RING_BUFFER_ALL_CPUS) {
/*
* Don't succeed if resizing is disabled, as a reader might be
@@ -2518,6 +2532,15 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
+ if (READ_ONCE(cpu_buffer->mapped)) {
+ /* Ensure the meta_page is ready */
+ smp_rmb();
+ WRITE_ONCE(cpu_buffer->meta_page->overrun,
+ local_read(&cpu_buffer->overrun));
+ WRITE_ONCE(cpu_buffer->meta_page->pages_lost,
+ local_read(&cpu_buffer->pages_lost));
+ }
+
/*
* The entries will be zeroed out when we move the
* tail page.
@@ -3180,6 +3203,14 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
local_inc(&cpu_buffer->entries);
+
+ if (READ_ONCE(cpu_buffer->mapped)) {
+ /* Ensure the meta_page is ready */
+ smp_rmb();
+ WRITE_ONCE(cpu_buffer->meta_page->entries,
+ local_read(&cpu_buffer->entries));
+ }
+
rb_end_commit(cpu_buffer);
}
@@ -3483,7 +3514,7 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
return;
/*
- * If this interrupted another event,
+ * If this interrupted another event,
*/
if (atomic_inc_return(this_cpu_ptr(&checking)) != 1)
goto out;
@@ -4655,6 +4686,13 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->last_overrun = overwrite;
}
+ if (cpu_buffer->mapped) {
+ WRITE_ONCE(cpu_buffer->meta_page->reader_page.read, 0);
+ WRITE_ONCE(cpu_buffer->meta_page->reader_page.id, reader->id);
+ WRITE_ONCE(cpu_buffer->meta_page->reader_page.lost_events, cpu_buffer->lost_events);
+ WRITE_ONCE(cpu_buffer->meta_page->pages_read, local_read(&cpu_buffer->pages_read));
+ }
+
goto again;
out:
@@ -4721,6 +4759,13 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
length = rb_event_length(event);
cpu_buffer->reader_page->read += length;
+
+ if (cpu_buffer->mapped) {
+ WRITE_ONCE(cpu_buffer->meta_page->reader_page.read,
+ cpu_buffer->reader_page->read);
+ WRITE_ONCE(cpu_buffer->meta_page->read,
+ cpu_buffer->read);
+ }
}
static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -5242,6 +5287,19 @@ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu)
}
EXPORT_SYMBOL_GPL(ring_buffer_size);
+static void rb_reset_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct ring_buffer_meta *meta = cpu_buffer->meta_page;
+
+ WRITE_ONCE(meta->entries, 0);
+ WRITE_ONCE(meta->overrun, 0);
+ WRITE_ONCE(meta->read, cpu_buffer->read);
+ WRITE_ONCE(meta->pages_touched, 0);
+ WRITE_ONCE(meta->pages_lost, 0);
+ WRITE_ONCE(meta->pages_read, local_read(&cpu_buffer->pages_read));
+ WRITE_ONCE(meta->reader_page.read, cpu_buffer->reader_page->read);
+}
+
static void
rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
{
@@ -5288,6 +5346,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->lost_events = 0;
cpu_buffer->last_overrun = 0;
+ if (cpu_buffer->mapped)
+ rb_reset_meta_page(cpu_buffer);
+
rb_head_page_activate(cpu_buffer);
}
@@ -5502,6 +5563,11 @@ int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
cpu_buffer_a = buffer_a->buffers[cpu];
cpu_buffer_b = buffer_b->buffers[cpu];
+ if (READ_ONCE(cpu_buffer_a->mapped) || READ_ONCE(cpu_buffer_b->mapped)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
/* At least make sure the two buffers are somewhat the same */
if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
goto out;
@@ -5735,7 +5801,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
* Otherwise, we can simply swap the page with the one passed in.
*/
if (read || (len < (commit - read)) ||
- cpu_buffer->reader_page == cpu_buffer->commit_page) {
+ cpu_buffer->reader_page == cpu_buffer->commit_page ||
+ cpu_buffer->mapped) {
struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
unsigned int rpos = read;
unsigned int pos = 0;
@@ -5852,6 +5919,255 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
}
EXPORT_SYMBOL_GPL(ring_buffer_read_page);
+static void rb_free_page_ids(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ int i;
+
+ for (i = 0; i < cpu_buffer->nr_pages + 1; i++)
+ virt_to_page(cpu_buffer->page_ids[i])->mapping = NULL;
+
+ kfree(cpu_buffer->page_ids);
+ cpu_buffer->page_ids = NULL;
+}
+
+static int rb_alloc_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ if (cpu_buffer->meta_page)
+ return 0;
+
+ cpu_buffer->meta_page = page_to_virt(alloc_page(GFP_USER));
+ if (!cpu_buffer->meta_page)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void rb_free_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ unsigned long addr = (unsigned long)cpu_buffer->meta_page;
+
+ virt_to_page(addr)->mapping = NULL;
+ free_page(addr);
+ cpu_buffer->meta_page = NULL;
+}
+
+static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned long *page_ids)
+{
+ struct ring_buffer_meta *meta = cpu_buffer->meta_page;
+ unsigned int nr_data_pages = cpu_buffer->nr_pages + 1;
+ struct buffer_page *first_page, *bpage;
+ int id = 0;
+
+ page_ids[id] = (unsigned long)cpu_buffer->reader_page->page;
+ cpu_buffer->reader_page->id = id++;
+
+ first_page = bpage = rb_set_head_page(cpu_buffer);
+ do {
+ if (id >= nr_data_pages) {
+ WARN_ON(1);
+ break;
+ }
+
+ page_ids[id] = (unsigned long)bpage->page;
+ bpage->id = id;
+
+ rb_inc_page(&bpage);
+ id++;
+ } while (bpage != first_page);
+
+ /* install page ID to kern VA translation */
+ cpu_buffer->page_ids = page_ids;
+
+ meta->meta_page_size = PAGE_SIZE;
+ meta->nr_data_pages = nr_data_pages;
+ meta->reader_page.id = cpu_buffer->reader_page->id;
+ rb_reset_meta_page(cpu_buffer);
+}
+
+static inline struct ring_buffer_per_cpu *
+rb_get_mapped_buffer(struct trace_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return ERR_PTR(-EINVAL);
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ mutex_lock(&cpu_buffer->mapping_lock);
+
+ if (!cpu_buffer->mapped) {
+ mutex_unlock(&cpu_buffer->mapping_lock);
+ return ERR_PTR(-ENODEV);
+ }
+
+ return cpu_buffer;
+}
+
+static inline void rb_put_mapped_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ mutex_unlock(&cpu_buffer->mapping_lock);
+}
+
+int ring_buffer_map(struct trace_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long flags, *page_ids;
+ int err = 0;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return -EINVAL;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ mutex_lock(&cpu_buffer->mapping_lock);
+
+ if (cpu_buffer->mapped) {
+ WRITE_ONCE(cpu_buffer->mapped, cpu_buffer->mapped + 1);
+ goto unlock;
+ }
+
+ /* prevent another thread from changing buffer sizes */
+ mutex_lock(&buffer->mutex);
+ atomic_inc(&cpu_buffer->resize_disabled);
+ mutex_unlock(&buffer->mutex);
+
+ err = rb_alloc_meta_page(cpu_buffer);
+ if (err) {
+ atomic_dec(&cpu_buffer->resize_disabled);
+ goto unlock;
+ }
+
+ /* page_ids include the reader page while nr_pages does not */
+ page_ids = kzalloc(sizeof(*page_ids) * (cpu_buffer->nr_pages + 1),
+ GFP_KERNEL);
+ if (!page_ids) {
+ rb_free_meta_page(cpu_buffer);
+ atomic_dec(&cpu_buffer->resize_disabled);
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ /*
+ * Lock all readers to block any page swap until the page IDs are
+ * assigned.
+ */
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ rb_setup_ids_meta_page(cpu_buffer, page_ids);
+ /*
+ * Ensure the writer will observe the meta-page before
+ * cpu_buffer->mapped.
+ */
+ smp_wmb();
+ WRITE_ONCE(cpu_buffer->mapped, 1);
+
+ /* Init meta_page values unless the writer did it already */
+ cmpxchg(&cpu_buffer->meta_page->entries, 0,
+ local_read(&cpu_buffer->entries));
+ cmpxchg(&cpu_buffer->meta_page->overrun, 0,
+ local_read(&cpu_buffer->overrun));
+ cmpxchg(&cpu_buffer->meta_page->pages_touched, 0,
+ local_read(&cpu_buffer->pages_touched));
+ cmpxchg(&cpu_buffer->meta_page->pages_lost, 0,
+ local_read(&cpu_buffer->pages_lost));
+
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+unlock:
+ mutex_unlock(&cpu_buffer->mapping_lock);
+
+ return err;
+}
+
+int ring_buffer_unmap(struct trace_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int err = 0;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return -EINVAL;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ mutex_lock(&cpu_buffer->mapping_lock);
+
+ if (!cpu_buffer->mapped) {
+ err = -ENODEV;
+ goto unlock;
+ }
+
+ WRITE_ONCE(cpu_buffer->mapped, cpu_buffer->mapped - 1);
+ if (!cpu_buffer->mapped) {
+ /* Wait the writer and readers to observe !mapped */
+ synchronize_rcu();
+
+ rb_free_page_ids(cpu_buffer);
+ rb_free_meta_page(cpu_buffer);
+ atomic_dec(&cpu_buffer->resize_disabled);
+ }
+
+unlock:
+ mutex_unlock(&cpu_buffer->mapping_lock);
+
+ return err;
+}
+
+/*
+ * +--------------+
+ * | meta page | pgoff=0
+ * +--------------+
+ * | data page1 | page_ids=0
+ * +--------------+
+ * | data page2 | page_ids=1
+ * ...
+ */
+struct page *ring_buffer_map_fault(struct trace_buffer *buffer, int cpu,
+ unsigned long pgoff)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+
+ if (!pgoff)
+ return virt_to_page((void *)cpu_buffer->meta_page);
+
+ pgoff--;
+ if (pgoff > cpu_buffer->nr_pages)
+ return NULL;
+
+ return virt_to_page(cpu_buffer->page_ids[pgoff]);
+}
+
+int ring_buffer_map_get_reader_page(struct trace_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long reader_size, flags;
+
+ cpu_buffer = rb_get_mapped_buffer(buffer, cpu);
+ if (IS_ERR(cpu_buffer))
+ return (int)PTR_ERR(cpu_buffer);
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+consume:
+ if (rb_per_cpu_empty(cpu_buffer))
+ goto out;
+ reader_size = rb_page_size(cpu_buffer->reader_page);
+ if (cpu_buffer->reader_page->read < reader_size) {
+ while (cpu_buffer->reader_page->read < reader_size)
+ rb_advance_reader(cpu_buffer);
+ goto out;
+ }
+
+ if (WARN_ON(!rb_get_reader_page(cpu_buffer)))
+ goto out;
+
+ goto consume;
+out:
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ rb_put_mapped_buffer(cpu_buffer);
+
+ return 0;
+}
+
/*
* We only allocate new buffers, never free them if the CPU goes down.
* If we were to free the buffer, then the user would lose any trace that was in
--
2.41.0.162.gfafddb0af9-goog
Hello,
kernel test robot noticed a 25.5% improvement of stress-ng.kill.ops_per_sec on:
commit: dab8a2e9ddebfc8629b0b7f527206c91d52b160e ("[PATCH v4 1/2] ring-buffer: Introducing ring-buffer mapping functions")
url: https://github.com/intel-lab-lkp/linux/commits/Vincent-Donnefort/ring-buffer-Introducing-ring-buffer-mapping-functions/20230613-163710
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git fb054096aea0576f0c0a61c598e5e9676443ee86
patch link: https://lore.kernel.org/all/[email protected]/
patch subject: [PATCH v4 1/2] ring-buffer: Introducing ring-buffer mapping functions
testcase: stress-ng
test machine: 64 threads 2 sockets Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz (Ice Lake) with 256G memory
parameters:
nr_threads: 100%
disk: 1HDD
testtime: 60s
class: interrupt
test: kill
cpufreq_governor: performance
Details are as below:
-------------------------------------------------------------------------------------------------->
To reproduce:
git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
sudo bin/lkp install job.yaml # job file is attached in this email
bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
sudo bin/lkp run generated-yaml-file
# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.
=========================================================================================
class/compiler/cpufreq_governor/disk/kconfig/nr_threads/rootfs/tbox_group/test/testcase/testtime:
interrupt/gcc-12/performance/1HDD/x86_64-rhel-8.3/100%/debian-11.1-x86_64-20220510.cgz/lkp-icl-2sp8/kill/stress-ng/60s
commit:
fb054096ae ("Merge tag 'mm-hotfixes-stable-2023-06-12-12-22' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm")
dab8a2e9dd ("ring-buffer: Introducing ring-buffer mapping functions")
fb054096aea0576f dab8a2e9ddebfc8629b0b7f5272
---------------- ---------------------------
%stddev %change %stddev
\ | \
0.01 +100.0% 0.02 turbostat.IPC
318.00 ? 98% +475.1% 1828 ?117% vmstat.memory.buff
85186 +23.8% 105494 vmstat.system.cs
0.72 +0.1 0.78 ? 2% mpstat.cpu.all.irq%
0.02 ? 6% +0.0 0.03 ? 3% mpstat.cpu.all.soft%
0.47 +0.3 0.81 ? 10% mpstat.cpu.all.usr%
167.67 ? 10% -34.2% 110.33 ? 7% perf-c2c.DRAM.local
1709 ? 3% -78.2% 372.33 ? 5% perf-c2c.DRAM.remote
3027 ? 4% -20.6% 2402 ? 6% perf-c2c.HITM.local
540.00 ? 5% -67.7% 174.67 ? 12% perf-c2c.HITM.remote
293385 +10.9% 325265 meminfo.Active
474.67 ? 59% +308.4% 1938 ?111% meminfo.Active(file)
318.00 ? 98% +473.6% 1824 ?118% meminfo.Buffers
99203 ? 2% +22.1% 121162 ? 3% meminfo.Mapped
354897 +13.0% 401038 meminfo.Shmem
737.88 +6.0% 782.52 stress-ng.kill.kill_calls_per_sec
913328 +25.5% 1146265 stress-ng.kill.ops
15221 +25.5% 19103 stress-ng.kill.ops_per_sec
2741307 +24.9% 3422867 stress-ng.time.involuntary_context_switches
2719695 +25.1% 3402015 stress-ng.time.voluntary_context_switches
72923 +10.5% 80615 proc-vmstat.nr_active_anon
118.50 ? 59% +308.9% 484.50 ?111% proc-vmstat.nr_active_file
89727 +1.8% 91366 proc-vmstat.nr_anon_pages
772985 +1.6% 785072 proc-vmstat.nr_file_pages
105184 +5.4% 110891 proc-vmstat.nr_inactive_anon
24859 ? 3% +22.5% 30450 ? 2% proc-vmstat.nr_mapped
88505 +13.3% 100250 proc-vmstat.nr_shmem
72923 +10.5% 80615 proc-vmstat.nr_zone_active_anon
118.50 ? 59% +308.9% 484.50 ?111% proc-vmstat.nr_zone_active_file
105184 +5.4% 110891 proc-vmstat.nr_zone_inactive_anon
18521 ? 15% +45.3% 26903 ? 8% proc-vmstat.numa_hint_faults_local
519320 +3.3% 536424 proc-vmstat.numa_hit
453047 +3.8% 470130 ? 2% proc-vmstat.numa_local
559942 +2.6% 574521 proc-vmstat.pgalloc_normal
1730277 -10.7% 1544435 sched_debug.cfs_rq:/.min_vruntime.min
25977 ? 8% +82.0% 47289 ? 3% sched_debug.cfs_rq:/.min_vruntime.stddev
46.15 ? 41% +63.1% 75.28 ? 24% sched_debug.cfs_rq:/.removed.load_avg.avg
142.13 ? 18% +25.5% 178.42 ? 10% sched_debug.cfs_rq:/.removed.load_avg.stddev
15.30 ? 40% +73.1% 26.49 ? 19% sched_debug.cfs_rq:/.removed.runnable_avg.avg
51.68 ? 25% +34.2% 69.36 ? 7% sched_debug.cfs_rq:/.removed.runnable_avg.stddev
15.30 ? 40% +73.1% 26.49 ? 19% sched_debug.cfs_rq:/.removed.util_avg.avg
51.68 ? 25% +34.2% 69.36 ? 7% sched_debug.cfs_rq:/.removed.util_avg.stddev
-163513 +101.1% -328771 sched_debug.cfs_rq:/.spread0.min
25962 ? 8% +82.2% 47315 ? 3% sched_debug.cfs_rq:/.spread0.stddev
185.25 ? 6% -30.6% 128.58 ? 46% sched_debug.cfs_rq:/.util_est_enqueued.min
91.74 ? 12% +27.1% 116.58 ? 17% sched_debug.cfs_rq:/.util_est_enqueued.stddev
47444 +21.0% 57414 sched_debug.cpu.nr_switches.avg
34565 ? 2% -56.3% 15121 ? 11% sched_debug.cpu.nr_switches.min
26970 ? 2% +64.1% 44271 ? 3% sched_debug.cpu.nr_switches.stddev
13.70 -2.0% 13.42 perf-stat.i.MPKI
1.579e+09 +18.3% 1.868e+09 perf-stat.i.branch-instructions
8703552 +7.4% 9347515 perf-stat.i.branch-misses
11.92 -8.9 2.97 ? 5% perf-stat.i.cache-miss-rate%
13454425 -72.4% 3712356 ? 3% perf-stat.i.cache-misses
1.116e+08 +19.3% 1.332e+08 perf-stat.i.cache-references
88572 +23.9% 109756 perf-stat.i.context-switches
27.02 -19.0% 21.88 perf-stat.i.cpi
15970 +309.6% 65417 ? 2% perf-stat.i.cycles-between-cache-misses
2.318e+09 +19.5% 2.77e+09 perf-stat.i.dTLB-loads
1.199e+09 +21.0% 1.452e+09 perf-stat.i.dTLB-stores
8.467e+09 +18.9% 1.006e+10 perf-stat.i.instructions
146.35 -63.4% 53.57 ? 14% perf-stat.i.metric.K/sec
81.18 +19.6% 97.05 perf-stat.i.metric.M/sec
86.90 -7.0 79.92 ? 2% perf-stat.i.node-load-miss-rate%
2738509 -77.0% 630505 ? 3% perf-stat.i.node-load-misses
407911 ? 5% -56.2% 178562 ? 14% perf-stat.i.node-loads
89.44 -36.9 52.51 perf-stat.i.node-store-miss-rate%
4392752 -92.0% 351197 ? 3% perf-stat.i.node-store-misses
0.53 -0.1 0.47 perf-stat.overall.branch-miss-rate%
12.05 -9.3 2.76 ? 2% perf-stat.overall.cache-miss-rate%
25.96 -16.1% 21.79 perf-stat.overall.cpi
16218 +264.0% 59031 ? 3% perf-stat.overall.cycles-between-cache-misses
0.00 ? 7% -0.0 0.00 ? 8% perf-stat.overall.dTLB-store-miss-rate%
0.04 +19.1% 0.05 perf-stat.overall.ipc
86.90 -9.4 77.51 ? 2% perf-stat.overall.node-load-miss-rate%
90.81 -48.4 42.45 ? 2% perf-stat.overall.node-store-miss-rate%
1.546e+09 +18.1% 1.826e+09 perf-stat.ps.branch-instructions
8207103 +5.4% 8650221 perf-stat.ps.branch-misses
13275967 -72.6% 3639102 ? 3% perf-stat.ps.cache-misses
1.102e+08 +19.8% 1.319e+08 perf-stat.ps.cache-references
87466 +24.2% 108660 perf-stat.ps.context-switches
2.275e+09 +19.5% 2.719e+09 perf-stat.ps.dTLB-loads
1.18e+09 +21.1% 1.43e+09 perf-stat.ps.dTLB-stores
8.294e+09 +18.7% 9.849e+09 perf-stat.ps.instructions
2703084 -77.1% 618728 ? 3% perf-stat.ps.node-load-misses
407596 ? 5% -55.8% 180316 ? 13% perf-stat.ps.node-loads
4340341 -92.0% 347078 ? 3% perf-stat.ps.node-store-misses
5.243e+11 +19.0% 6.239e+11 perf-stat.total.instructions
0.02 ? 19% +158.0% 0.06 ? 50% perf-sched.sch_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
0.01 ? 39% +61.1% 0.02 ? 20% perf-sched.sch_delay.avg.ms.__x64_sys_pause.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
0.34 ?183% +373.4% 1.62 ? 73% perf-sched.sch_delay.avg.ms.io_schedule.bit_wait_io.__wait_on_bit.out_of_line_wait_on_bit
1.92 ? 8% +102.3% 3.88 ? 13% perf-sched.sch_delay.avg.ms.irq_thread.kthread.ret_from_fork
0.94 ?148% +253.3% 3.31 ? 76% perf-sched.sch_delay.avg.ms.schedule_timeout.ext4_lazyinit_thread.part.0.kthread
0.63 ?167% +440.9% 3.42 ? 59% perf-sched.sch_delay.avg.ms.schedule_timeout.io_schedule_timeout.__wait_for_common.submit_bio_wait
1.92 ? 9% +27.4% 2.44 ? 15% perf-sched.sch_delay.avg.ms.schedule_timeout.kcompactd.kthread.ret_from_fork
0.90 ? 6% -20.1% 0.72 ? 6% perf-sched.sch_delay.avg.ms.smpboot_thread_fn.kthread.ret_from_fork
2.90 ? 11% +220.7% 9.30 ? 18% perf-sched.sch_delay.max.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
6.26 ? 30% +479.0% 36.23 ? 65% perf-sched.sch_delay.max.ms.__x64_sys_pause.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
0.68 ?183% +372.0% 3.23 ? 73% perf-sched.sch_delay.max.ms.io_schedule.bit_wait_io.__wait_on_bit.out_of_line_wait_on_bit
3.96 ? 14% +135.0% 9.32 ? 13% perf-sched.sch_delay.max.ms.irq_thread.kthread.ret_from_fork
0.94 ?148% +253.3% 3.31 ? 76% perf-sched.sch_delay.max.ms.schedule_timeout.ext4_lazyinit_thread.part.0.kthread
0.63 ?167% +440.9% 3.42 ? 59% perf-sched.sch_delay.max.ms.schedule_timeout.io_schedule_timeout.__wait_for_common.submit_bio_wait
4.02 ? 7% +89.6% 7.63 ? 12% perf-sched.sch_delay.max.ms.schedule_timeout.kcompactd.kthread.ret_from_fork
5.09 ? 13% +101.8% 10.27 ? 8% perf-sched.sch_delay.max.ms.smpboot_thread_fn.kthread.ret_from_fork
12.96 ? 7% +22.8% 15.91 ? 8% perf-sched.sch_delay.max.ms.wait_for_partner.fifo_open.do_dentry_open.do_open
5.38 ? 13% +89.3% 10.19 ? 12% perf-sched.sch_delay.max.ms.worker_thread.kthread.ret_from_fork
2.22 ? 2% -18.6% 1.80 ? 3% perf-sched.total_wait_and_delay.average.ms
462455 +23.1% 569289 perf-sched.total_wait_and_delay.count.ms
2.20 ? 2% -18.7% 1.79 ? 3% perf-sched.total_wait_time.average.ms
13.15 ? 7% -10.5% 11.77 ? 3% perf-sched.wait_and_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
118.49 ? 34% -77.9% 26.13 ? 32% perf-sched.wait_and_delay.avg.ms.__cond_resched.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.vfs_write
4.14 -20.8% 3.28 perf-sched.wait_and_delay.avg.ms.do_signal_stop.get_signal.arch_do_signal_or_restart.exit_to_user_mode_loop
0.02 ? 5% +46.7% 0.03 ? 11% perf-sched.wait_and_delay.avg.ms.exit_to_user_mode_loop.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64
18.24 ?142% +977.7% 196.58 ?124% perf-sched.wait_and_delay.avg.ms.kjournald2.kthread.ret_from_fork
3.24 ?144% +244.9% 11.18 ? 53% perf-sched.wait_and_delay.avg.ms.schedule_timeout.ext4_lazyinit_thread.part.0.kthread
7.83 ? 58% +153.2% 19.83 ? 34% perf-sched.wait_and_delay.count.__cond_resched.__alloc_pages.__folio_alloc.vma_alloc_folio.shmem_alloc_folio
352.50 ? 8% -9.2% 320.00 perf-sched.wait_and_delay.count.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
25.50 ? 11% -28.8% 18.17 ? 20% perf-sched.wait_and_delay.count.__cond_resched.down_write_killable.exec_mmap.begin_new_exec.load_elf_binary
20.33 ? 16% +263.1% 73.83 ? 31% perf-sched.wait_and_delay.count.__cond_resched.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.vfs_write
151847 +23.8% 187914 perf-sched.wait_and_delay.count.__x64_sys_pause.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
75617 +23.0% 93028 perf-sched.wait_and_delay.count.do_signal_stop.get_signal.arch_do_signal_or_restart.exit_to_user_mode_loop
230482 +23.1% 283813 perf-sched.wait_and_delay.count.exit_to_user_mode_loop.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64
1246 ? 20% -55.5% 555.15 ? 32% perf-sched.wait_and_delay.max.ms.__cond_resched.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.vfs_write
18.24 ?142% +977.7% 196.58 ?124% perf-sched.wait_and_delay.max.ms.kjournald2.kthread.ret_from_fork
3.24 ?144% +244.9% 11.18 ? 53% perf-sched.wait_and_delay.max.ms.schedule_timeout.ext4_lazyinit_thread.part.0.kthread
12.96 ? 7% +22.8% 15.91 ? 8% perf-sched.wait_and_delay.max.ms.wait_for_partner.fifo_open.do_dentry_open.do_open
13.12 ? 7% -10.8% 11.70 ? 4% perf-sched.wait_time.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
0.01 ? 29% +656.5% 0.09 ?172% perf-sched.wait_time.avg.ms.__cond_resched.dput.step_into.link_path_walk.part
0.01 ? 9% +5204.7% 0.38 ?152% perf-sched.wait_time.avg.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat
118.49 ? 34% -77.9% 26.13 ? 32% perf-sched.wait_time.avg.ms.__cond_resched.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.vfs_write
4.13 -20.9% 3.27 perf-sched.wait_time.avg.ms.do_signal_stop.get_signal.arch_do_signal_or_restart.exit_to_user_mode_loop
0.02 ? 3% +49.1% 0.03 ? 12% perf-sched.wait_time.avg.ms.exit_to_user_mode_loop.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64
0.01 ?186% +479.6% 0.05 ? 39% perf-sched.wait_time.avg.ms.io_schedule.bit_wait_io.__wait_on_bit.out_of_line_wait_on_bit
16.37 ?142% +1077.5% 192.72 ?127% perf-sched.wait_time.avg.ms.kjournald2.kthread.ret_from_fork
2.30 ?142% +241.5% 7.87 ? 43% perf-sched.wait_time.avg.ms.schedule_timeout.ext4_lazyinit_thread.part.0.kthread
0.03 ? 53% +3495.7% 0.97 ?200% perf-sched.wait_time.max.ms.__cond_resched.dput.step_into.link_path_walk.part
0.01 ? 40% +29892.9% 3.50 ?143% perf-sched.wait_time.max.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat
1246 ? 20% -55.5% 555.15 ? 32% perf-sched.wait_time.max.ms.__cond_resched.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.vfs_write
24.58 ? 13% +45.8% 35.84 ? 21% perf-sched.wait_time.max.ms.do_signal_stop.get_signal.arch_do_signal_or_restart.exit_to_user_mode_loop
0.02 ?186% +475.9% 0.10 ? 39% perf-sched.wait_time.max.ms.io_schedule.bit_wait_io.__wait_on_bit.out_of_line_wait_on_bit
16.37 ?142% +1077.5% 192.72 ?127% perf-sched.wait_time.max.ms.kjournald2.kthread.ret_from_fork
2.30 ?142% +241.5% 7.87 ? 43% perf-sched.wait_time.max.ms.schedule_timeout.ext4_lazyinit_thread.part.0.kthread
9.10 ? 2% -5.5 3.64 ? 2% perf-profile.calltrace.cycles-pp.aa_may_signal.apparmor_task_kill.security_task_kill.kill_something_info.__x64_sys_kill
94.25 -2.4 91.90 perf-profile.calltrace.cycles-pp.apparmor_task_kill.security_task_kill.kill_something_info.__x64_sys_kill.do_syscall_64
94.39 -2.0 92.42 perf-profile.calltrace.cycles-pp.security_task_kill.kill_something_info.__x64_sys_kill.do_syscall_64.entry_SYSCALL_64_after_hwframe
98.29 -1.0 97.34 perf-profile.calltrace.cycles-pp.kill_something_info.__x64_sys_kill.do_syscall_64.entry_SYSCALL_64_after_hwframe.kill
98.33 -0.9 97.40 perf-profile.calltrace.cycles-pp.__x64_sys_kill.do_syscall_64.entry_SYSCALL_64_after_hwframe.kill
98.46 -0.9 97.59 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.kill
98.46 -0.9 97.60 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.kill
98.49 -0.9 97.63 perf-profile.calltrace.cycles-pp.kill
0.66 +0.2 0.83 ? 2% perf-profile.calltrace.cycles-pp.apparmor_task_kill.security_task_kill.kill_pid_info.kill_something_info.__x64_sys_kill
0.67 +0.2 0.84 ? 2% perf-profile.calltrace.cycles-pp.security_task_kill.kill_pid_info.kill_something_info.__x64_sys_kill.do_syscall_64
0.72 +0.2 0.92 perf-profile.calltrace.cycles-pp.profile_signal_perm.aa_may_signal.apparmor_task_kill.security_task_kill.kill_something_info
0.77 +0.2 0.98 perf-profile.calltrace.cycles-pp.kill_pid_info.kill_something_info.__x64_sys_kill.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.56 ? 3% +0.3 0.81 ? 3% perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.pause
0.56 ? 3% +0.3 0.82 ? 3% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.pause
0.57 ? 3% +0.3 0.83 ? 3% perf-profile.calltrace.cycles-pp.pause
0.00 +0.6 0.56 ? 3% perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.apparmor_task_kill.security_task_kill.kill_something_info.__x64_sys_kill
0.08 ?223% +0.6 0.70 ? 4% perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.pause
0.08 ?223% +0.6 0.70 ? 4% perf-profile.calltrace.cycles-pp.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.pause
0.00 +0.6 0.65 ? 3% perf-profile.calltrace.cycles-pp.do_filp_open.do_sys_openat2.__x64_sys_openat.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.00 +0.6 0.65 ? 3% perf-profile.calltrace.cycles-pp.path_openat.do_filp_open.do_sys_openat2.__x64_sys_openat.do_syscall_64
2.44 ? 3% +0.6 3.09 ? 3% perf-profile.calltrace.cycles-pp.queued_read_lock_slowpath.kill_something_info.__x64_sys_kill.do_syscall_64.entry_SYSCALL_64_after_hwframe
2.43 ? 3% +0.7 3.09 ? 3% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath.queued_read_lock_slowpath.kill_something_info.__x64_sys_kill.do_syscall_64
0.00 +0.7 0.67 ? 4% perf-profile.calltrace.cycles-pp.get_signal.arch_do_signal_or_restart.exit_to_user_mode_loop.exit_to_user_mode_prepare.syscall_exit_to_user_mode
0.00 +0.7 0.68 ? 3% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.open64
0.00 +0.7 0.68 ? 3% perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.open64
0.00 +0.7 0.68 ? 3% perf-profile.calltrace.cycles-pp.__x64_sys_openat.do_syscall_64.entry_SYSCALL_64_after_hwframe.open64
0.00 +0.7 0.68 ? 3% perf-profile.calltrace.cycles-pp.do_sys_openat2.__x64_sys_openat.do_syscall_64.entry_SYSCALL_64_after_hwframe.open64
0.00 +0.7 0.68 ? 3% perf-profile.calltrace.cycles-pp.open64
0.00 +0.7 0.69 ? 4% perf-profile.calltrace.cycles-pp.arch_do_signal_or_restart.exit_to_user_mode_loop.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64
0.00 +0.7 0.70 ? 5% perf-profile.calltrace.cycles-pp.exit_to_user_mode_loop.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
38.83 +3.0 41.81 perf-profile.calltrace.cycles-pp.aa_get_task_label.apparmor_task_kill.security_task_kill.kill_something_info.__x64_sys_kill
9.20 ? 2% -5.5 3.72 ? 2% perf-profile.children.cycles-pp.aa_may_signal
94.97 -1.8 93.15 perf-profile.children.cycles-pp.apparmor_task_kill
95.08 -1.8 93.28 perf-profile.children.cycles-pp.security_task_kill
98.32 -0.9 97.38 perf-profile.children.cycles-pp.kill_something_info
98.33 -0.9 97.40 perf-profile.children.cycles-pp.__x64_sys_kill
98.50 -0.9 97.64 perf-profile.children.cycles-pp.kill
99.90 -0.1 99.81 perf-profile.children.cycles-pp.do_syscall_64
99.91 -0.1 99.82 perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
0.07 -0.0 0.06 ? 6% perf-profile.children.cycles-pp.exit_notify
0.06 ? 9% +0.0 0.07 perf-profile.children.cycles-pp.audit_signal_info
0.08 ? 5% +0.0 0.10 perf-profile.children.cycles-pp.ksys_read
0.06 ? 7% +0.0 0.08 ? 4% perf-profile.children.cycles-pp.audit_signal_info_syscall
0.09 ? 5% +0.0 0.11 ? 4% perf-profile.children.cycles-pp.security_file_open
0.09 ? 5% +0.0 0.11 ? 4% perf-profile.children.cycles-pp.apparmor_file_open
0.05 +0.0 0.08 ? 6% perf-profile.children.cycles-pp.apparmor_file_alloc_security
0.05 +0.0 0.08 perf-profile.children.cycles-pp.security_file_alloc
0.06 ? 7% +0.0 0.10 ? 4% perf-profile.children.cycles-pp.__x64_sys_pause
0.06 ? 7% +0.0 0.10 ? 5% perf-profile.children.cycles-pp.complete_signal
0.11 ? 4% +0.0 0.14 perf-profile.children.cycles-pp.__task_pid_nr_ns
0.06 +0.0 0.10 ? 8% perf-profile.children.cycles-pp.__d_lookup
0.06 ? 6% +0.0 0.10 ? 8% perf-profile.children.cycles-pp.open_last_lookups
0.07 +0.0 0.11 ? 4% perf-profile.children.cycles-pp.alloc_empty_file
0.05 ? 7% +0.0 0.09 ? 4% perf-profile.children.cycles-pp.__fput
0.07 ? 7% +0.0 0.10 ? 4% perf-profile.children.cycles-pp.__alloc_file
0.07 ? 5% +0.0 0.11 ? 5% perf-profile.children.cycles-pp.try_to_wake_up
0.02 ? 99% +0.0 0.06 ? 7% perf-profile.children.cycles-pp._raw_spin_lock_irqsave
0.07 +0.0 0.11 ? 3% perf-profile.children.cycles-pp.task_work_run
0.13 ? 5% +0.0 0.17 ? 2% perf-profile.children.cycles-pp.__kill_pgrp_info
0.09 ? 5% +0.0 0.14 ? 4% perf-profile.children.cycles-pp.do_send_sig_info
0.09 ? 4% +0.1 0.14 ? 2% perf-profile.children.cycles-pp.__send_signal_locked
0.17 ? 2% +0.1 0.22 ? 5% perf-profile.children.cycles-pp.do_dentry_open
0.00 +0.1 0.05 ? 7% perf-profile.children.cycles-pp.perf_mux_hrtimer_handler
0.09 +0.1 0.14 ? 6% perf-profile.children.cycles-pp.lookup_fast
0.00 +0.1 0.06 ? 9% perf-profile.children.cycles-pp.lockref_get
0.00 +0.1 0.06 ? 9% perf-profile.children.cycles-pp.lockref_put_or_lock
0.00 +0.1 0.06 ? 9% perf-profile.children.cycles-pp.ttwu_do_activate
0.08 ? 4% +0.1 0.14 ? 5% perf-profile.children.cycles-pp.inode_permission
0.08 ? 4% +0.1 0.14 ? 5% perf-profile.children.cycles-pp.proc_sys_permission
0.07 +0.1 0.13 ? 5% perf-profile.children.cycles-pp.switch_mm_irqs_off
0.07 ? 6% +0.1 0.13 ? 5% perf-profile.children.cycles-pp._raw_spin_lock_irq
0.41 +0.1 0.47 perf-profile.children.cycles-pp.check_kill_permission
0.09 +0.1 0.15 ? 2% perf-profile.children.cycles-pp.__close
0.00 +0.1 0.06 ? 6% perf-profile.children.cycles-pp.security_file_free
0.00 +0.1 0.06 ? 6% perf-profile.children.cycles-pp.apparmor_file_free_security
0.00 +0.1 0.06 ? 6% perf-profile.children.cycles-pp.walk_component
0.18 ? 2% +0.1 0.24 ? 4% perf-profile.children.cycles-pp.do_open
0.00 +0.1 0.07 ? 19% perf-profile.children.cycles-pp.shmem_write_begin
0.20 +0.1 0.27 ? 4% perf-profile.children.cycles-pp.__hrtimer_run_queues
0.22 ? 8% +0.1 0.29 ? 6% perf-profile.children.cycles-pp.do_signal_stop
0.00 +0.1 0.07 ? 18% perf-profile.children.cycles-pp.shmem_get_folio_gfp
0.00 +0.1 0.08 ? 6% perf-profile.children.cycles-pp.dput
0.11 ? 3% +0.1 0.19 ? 3% perf-profile.children.cycles-pp.link_path_walk
0.25 +0.1 0.32 ? 8% perf-profile.children.cycles-pp.__sysvec_apic_timer_interrupt
0.24 +0.1 0.32 ? 8% perf-profile.children.cycles-pp.hrtimer_interrupt
0.28 +0.1 0.38 ? 7% perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt
0.20 ? 2% +0.1 0.31 ? 3% perf-profile.children.cycles-pp.__schedule
0.20 ? 4% +0.1 0.31 ? 3% perf-profile.children.cycles-pp.schedule
0.00 +0.1 0.13 ? 17% perf-profile.children.cycles-pp.generic_perform_write
0.00 +0.1 0.13 ? 18% perf-profile.children.cycles-pp.__generic_file_write_iter
0.00 +0.1 0.14 ? 17% perf-profile.children.cycles-pp.generic_file_write_iter
0.00 +0.2 0.16 ? 18% perf-profile.children.cycles-pp.vfs_write
0.31 ? 3% +0.2 0.48 ? 3% perf-profile.children.cycles-pp._raw_spin_lock
0.00 +0.2 0.20 ? 20% perf-profile.children.cycles-pp.ksys_write
0.00 +0.2 0.20 ? 19% perf-profile.children.cycles-pp.__libc_write
0.46 ? 3% +0.2 0.67 ? 4% perf-profile.children.cycles-pp.get_signal
0.74 +0.2 0.95 perf-profile.children.cycles-pp.profile_signal_perm
0.00 +0.2 0.21 ? 19% perf-profile.children.cycles-pp.record__pushfn
0.02 ? 99% +0.2 0.24 ? 19% perf-profile.children.cycles-pp.__libc_start_main
0.02 ? 99% +0.2 0.24 ? 19% perf-profile.children.cycles-pp.main
0.02 ? 99% +0.2 0.24 ? 19% perf-profile.children.cycles-pp.run_builtin
0.00 +0.2 0.21 ? 20% perf-profile.children.cycles-pp.writen
0.00 +0.2 0.21 ? 20% perf-profile.children.cycles-pp.perf_mmap__push
0.77 +0.2 0.98 perf-profile.children.cycles-pp.kill_pid_info
0.00 +0.2 0.22 ? 20% perf-profile.children.cycles-pp.record__mmap_read_evlist
0.01 ?223% +0.2 0.23 ? 20% perf-profile.children.cycles-pp.cmd_record
0.01 ?223% +0.2 0.23 ? 20% perf-profile.children.cycles-pp.__cmd_record
0.50 ? 2% +0.2 0.72 ? 4% perf-profile.children.cycles-pp.arch_do_signal_or_restart
0.43 +0.2 0.66 ? 3% perf-profile.children.cycles-pp.path_openat
0.44 +0.2 0.66 ? 3% perf-profile.children.cycles-pp.do_filp_open
0.45 +0.2 0.69 ? 3% perf-profile.children.cycles-pp.open64
0.46 +0.2 0.70 ? 3% perf-profile.children.cycles-pp.__x64_sys_openat
0.46 +0.2 0.70 ? 3% perf-profile.children.cycles-pp.do_sys_openat2
0.58 ? 3% +0.3 0.83 ? 3% perf-profile.children.cycles-pp.pause
0.67 ? 2% +0.3 1.00 ? 3% perf-profile.children.cycles-pp.exit_to_user_mode_loop
0.70 ? 2% +0.3 1.03 ? 3% perf-profile.children.cycles-pp.syscall_exit_to_user_mode
0.70 ? 2% +0.3 1.03 ? 3% perf-profile.children.cycles-pp.exit_to_user_mode_prepare
2.74 ? 3% +0.7 3.49 ? 3% perf-profile.children.cycles-pp.queued_read_lock_slowpath
0.32 +0.8 1.13 ? 3% perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt
2.98 ? 3% +0.8 3.82 ? 3% perf-profile.children.cycles-pp.native_queued_spin_lock_slowpath
39.13 +3.5 42.64 perf-profile.children.cycles-pp.aa_get_task_label
8.44 ? 2% -5.7 2.78 ? 2% perf-profile.self.cycles-pp.aa_may_signal
0.18 ? 2% -0.0 0.17 ? 2% perf-profile.self.cycles-pp.apparmor_capable
0.10 ? 4% +0.0 0.12 ? 3% perf-profile.self.cycles-pp.security_task_kill
0.05 ? 7% +0.0 0.07 perf-profile.self.cycles-pp.audit_signal_info_syscall
0.09 ? 7% +0.0 0.11 ? 6% perf-profile.self.cycles-pp.apparmor_file_open
0.09 ? 5% +0.0 0.11 ? 4% perf-profile.self.cycles-pp.__task_pid_nr_ns
0.05 +0.0 0.08 ? 6% perf-profile.self.cycles-pp.apparmor_file_alloc_security
0.09 ? 5% +0.0 0.12 ? 4% perf-profile.self.cycles-pp.kill_something_info
0.11 ? 4% +0.0 0.15 ? 6% perf-profile.self.cycles-pp.check_kill_permission
0.02 ? 99% +0.0 0.06 ? 6% perf-profile.self.cycles-pp._raw_spin_lock_irqsave
0.12 ? 4% +0.0 0.16 ? 3% perf-profile.self.cycles-pp.__kill_pgrp_info
0.01 ?223% +0.0 0.06 ? 6% perf-profile.self.cycles-pp.audit_signal_info
0.05 ? 7% +0.1 0.11 ? 3% perf-profile.self.cycles-pp._raw_spin_lock_irq
0.07 +0.1 0.13 ? 5% perf-profile.self.cycles-pp.switch_mm_irqs_off
0.00 +0.1 0.06 ? 6% perf-profile.self.cycles-pp.apparmor_file_free_security
0.10 ? 4% +0.1 0.18 ? 2% perf-profile.self.cycles-pp._raw_spin_lock
0.72 +0.2 0.92 perf-profile.self.cycles-pp.profile_signal_perm
2.97 ? 3% +0.8 3.80 ? 3% perf-profile.self.cycles-pp.native_queued_spin_lock_slowpath
39.00 +3.4 42.44 perf-profile.self.cycles-pp.aa_get_task_label
Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki