Sometimes we want to know an accurate number of samples even if it's
lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which
might be shared with other events. So it's hard to know per-event
lost count.
Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from
userspace.
Original-patch-by: Jiri Olsa <[email protected]>
Signed-off-by: Namhyung Kim <[email protected]>
---
include/linux/perf_event.h | 2 ++
include/uapi/linux/perf_event.h | 5 ++++-
kernel/events/core.c | 22 +++++++++++++++++++---
kernel/events/ring_buffer.c | 5 ++++-
4 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f5a6a2f069ed..189a471fba42 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -756,6 +756,8 @@ struct perf_event {
struct pid_namespace *ns;
u64 id;
+ atomic64_t lost_samples;
+
u64 (*clock)(void);
perf_overflow_handler_t overflow_handler;
void *overflow_handler_context;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index bf8143505c49..f72008949ff0 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -299,6 +299,7 @@ enum {
* { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
* { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
* { u64 id; } && PERF_FORMAT_ID
+ * { u64 lost; } && PERF_FORMAT_LOST
* } && !PERF_FORMAT_GROUP
*
* { u64 nr;
@@ -306,6 +307,7 @@ enum {
* { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
* { u64 value;
* { u64 id; } && PERF_FORMAT_ID
+ * { u64 lost; } && PERF_FORMAT_LOST
* } cntr[nr];
* } && PERF_FORMAT_GROUP
* };
@@ -315,8 +317,9 @@ enum perf_event_read_format {
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
PERF_FORMAT_GROUP = 1U << 3,
+ PERF_FORMAT_LOST = 1U << 4,
- PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
+ PERF_FORMAT_MAX = 1U << 5, /* non-ABI */
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0e125ae2fa92..8708325ee4a2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1841,6 +1841,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
if (event->attr.read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
+ if (event->attr.read_format & PERF_FORMAT_LOST)
+ entry += sizeof(u64);
+
if (event->attr.read_format & PERF_FORMAT_GROUP) {
nr += nr_siblings;
size += sizeof(u64);
@@ -5255,11 +5258,15 @@ static int __perf_read_group_add(struct perf_event *leader,
values[n++] += perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = atomic64_read(&leader->lost_samples);
for_each_sibling_event(sub, leader) {
values[n++] += perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = atomic64_read(&sub->lost_samples);
}
raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -5316,7 +5323,7 @@ static int perf_read_one(struct perf_event *event,
u64 read_format, char __user *buf)
{
u64 enabled, running;
- u64 values[4];
+ u64 values[5];
int n = 0;
values[n++] = __perf_event_read_value(event, &enabled, &running);
@@ -5326,6 +5333,8 @@ static int perf_read_one(struct perf_event *event,
values[n++] = running;
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(event);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&event->lost_samples);
if (copy_to_user(buf, values, n * sizeof(u64)))
return -EFAULT;
@@ -5664,6 +5673,7 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
return perf_event_modify_attr(event, &new_attr);
}
+
default:
return -ENOTTY;
}
@@ -6835,7 +6845,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
u64 enabled, u64 running)
{
u64 read_format = event->attr.read_format;
- u64 values[4];
+ u64 values[5];
int n = 0;
values[n++] = perf_event_count(event);
@@ -6849,6 +6859,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
}
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(event);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&event->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
}
@@ -6859,7 +6871,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
{
struct perf_event *leader = event->group_leader, *sub;
u64 read_format = event->attr.read_format;
- u64 values[5];
+ u64 values[6];
int n = 0;
values[n++] = 1 + leader->nr_siblings;
@@ -6877,6 +6889,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&leader->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
@@ -6890,6 +6904,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&sub->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
}
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 52868716ec35..727ca8f4caad 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -172,8 +172,10 @@ __perf_output_begin(struct perf_output_handle *handle,
goto out;
if (unlikely(rb->paused)) {
- if (rb->nr_pages)
+ if (rb->nr_pages) {
local_inc(&rb->lost);
+ atomic64_inc(&event->lost_samples);
+ }
goto out;
}
@@ -254,6 +256,7 @@ __perf_output_begin(struct perf_output_handle *handle,
fail:
local_inc(&rb->lost);
+ atomic64_inc(&event->lost_samples);
perf_output_put_handle(handle);
out:
rcu_read_unlock();
--
2.33.0.309.g3052b89438-goog
On Wed, Sep 08, 2021 at 10:24:20AM -0700, Namhyung Kim wrote:
> Sometimes we want to know an accurate number of samples even if it's
> lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which
> might be shared with other events. So it's hard to know per-event
> lost count.
>
> Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from
> userspace.
hi,
looks good.. will there be some tools/perf change using this?
thanks,
jirka
>
> Original-patch-by: Jiri Olsa <[email protected]>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> include/linux/perf_event.h | 2 ++
> include/uapi/linux/perf_event.h | 5 ++++-
> kernel/events/core.c | 22 +++++++++++++++++++---
> kernel/events/ring_buffer.c | 5 ++++-
> 4 files changed, 29 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index f5a6a2f069ed..189a471fba42 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -756,6 +756,8 @@ struct perf_event {
> struct pid_namespace *ns;
> u64 id;
>
> + atomic64_t lost_samples;
> +
> u64 (*clock)(void);
> perf_overflow_handler_t overflow_handler;
> void *overflow_handler_context;
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index bf8143505c49..f72008949ff0 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -299,6 +299,7 @@ enum {
> * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
> * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
> * { u64 id; } && PERF_FORMAT_ID
> + * { u64 lost; } && PERF_FORMAT_LOST
> * } && !PERF_FORMAT_GROUP
> *
> * { u64 nr;
> @@ -306,6 +307,7 @@ enum {
> * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
> * { u64 value;
> * { u64 id; } && PERF_FORMAT_ID
> + * { u64 lost; } && PERF_FORMAT_LOST
> * } cntr[nr];
> * } && PERF_FORMAT_GROUP
> * };
> @@ -315,8 +317,9 @@ enum perf_event_read_format {
> PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
> PERF_FORMAT_ID = 1U << 2,
> PERF_FORMAT_GROUP = 1U << 3,
> + PERF_FORMAT_LOST = 1U << 4,
>
> - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
> + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */
> };
>
> #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 0e125ae2fa92..8708325ee4a2 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -1841,6 +1841,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
> if (event->attr.read_format & PERF_FORMAT_ID)
> entry += sizeof(u64);
>
> + if (event->attr.read_format & PERF_FORMAT_LOST)
> + entry += sizeof(u64);
> +
> if (event->attr.read_format & PERF_FORMAT_GROUP) {
> nr += nr_siblings;
> size += sizeof(u64);
> @@ -5255,11 +5258,15 @@ static int __perf_read_group_add(struct perf_event *leader,
> values[n++] += perf_event_count(leader);
> if (read_format & PERF_FORMAT_ID)
> values[n++] = primary_event_id(leader);
> + if (read_format & PERF_FORMAT_ID)
> + values[n++] = atomic64_read(&leader->lost_samples);
>
> for_each_sibling_event(sub, leader) {
> values[n++] += perf_event_count(sub);
> if (read_format & PERF_FORMAT_ID)
> values[n++] = primary_event_id(sub);
> + if (read_format & PERF_FORMAT_ID)
> + values[n++] = atomic64_read(&sub->lost_samples);
> }
>
> raw_spin_unlock_irqrestore(&ctx->lock, flags);
> @@ -5316,7 +5323,7 @@ static int perf_read_one(struct perf_event *event,
> u64 read_format, char __user *buf)
> {
> u64 enabled, running;
> - u64 values[4];
> + u64 values[5];
> int n = 0;
>
> values[n++] = __perf_event_read_value(event, &enabled, &running);
> @@ -5326,6 +5333,8 @@ static int perf_read_one(struct perf_event *event,
> values[n++] = running;
> if (read_format & PERF_FORMAT_ID)
> values[n++] = primary_event_id(event);
> + if (read_format & PERF_FORMAT_LOST)
> + values[n++] = atomic64_read(&event->lost_samples);
>
> if (copy_to_user(buf, values, n * sizeof(u64)))
> return -EFAULT;
> @@ -5664,6 +5673,7 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
>
> return perf_event_modify_attr(event, &new_attr);
> }
> +
> default:
> return -ENOTTY;
> }
> @@ -6835,7 +6845,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
> u64 enabled, u64 running)
> {
> u64 read_format = event->attr.read_format;
> - u64 values[4];
> + u64 values[5];
> int n = 0;
>
> values[n++] = perf_event_count(event);
> @@ -6849,6 +6859,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
> }
> if (read_format & PERF_FORMAT_ID)
> values[n++] = primary_event_id(event);
> + if (read_format & PERF_FORMAT_LOST)
> + values[n++] = atomic64_read(&event->lost_samples);
>
> __output_copy(handle, values, n * sizeof(u64));
> }
> @@ -6859,7 +6871,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> {
> struct perf_event *leader = event->group_leader, *sub;
> u64 read_format = event->attr.read_format;
> - u64 values[5];
> + u64 values[6];
> int n = 0;
>
> values[n++] = 1 + leader->nr_siblings;
> @@ -6877,6 +6889,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> values[n++] = perf_event_count(leader);
> if (read_format & PERF_FORMAT_ID)
> values[n++] = primary_event_id(leader);
> + if (read_format & PERF_FORMAT_LOST)
> + values[n++] = atomic64_read(&leader->lost_samples);
>
> __output_copy(handle, values, n * sizeof(u64));
>
> @@ -6890,6 +6904,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> values[n++] = perf_event_count(sub);
> if (read_format & PERF_FORMAT_ID)
> values[n++] = primary_event_id(sub);
> + if (read_format & PERF_FORMAT_LOST)
> + values[n++] = atomic64_read(&sub->lost_samples);
>
> __output_copy(handle, values, n * sizeof(u64));
> }
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 52868716ec35..727ca8f4caad 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -172,8 +172,10 @@ __perf_output_begin(struct perf_output_handle *handle,
> goto out;
>
> if (unlikely(rb->paused)) {
> - if (rb->nr_pages)
> + if (rb->nr_pages) {
> local_inc(&rb->lost);
> + atomic64_inc(&event->lost_samples);
> + }
> goto out;
> }
>
> @@ -254,6 +256,7 @@ __perf_output_begin(struct perf_output_handle *handle,
>
> fail:
> local_inc(&rb->lost);
> + atomic64_inc(&event->lost_samples);
> perf_output_put_handle(handle);
> out:
> rcu_read_unlock();
> --
> 2.33.0.309.g3052b89438-goog
>
On Fri, Sep 10, 2021 at 2:42 PM Jiri Olsa <[email protected]> wrote:
>
> On Wed, Sep 08, 2021 at 10:24:20AM -0700, Namhyung Kim wrote:
> > Sometimes we want to know an accurate number of samples even if it's
> > lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which
> > might be shared with other events. So it's hard to know per-event
> > lost count.
> >
> > Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from
> > userspace.
>
> hi,
> looks good.. will there be some tools/perf change using this?
Sure, I'll work on the userspace part after this is merged.
I'm thinking about adding LOST_SAMPLES records at the end
of the data section as if they came from the kernel.
Thanks,
Namhyung
Hi Peter,
On Fri, Sep 10, 2021 at 3:40 PM Namhyung Kim <[email protected]> wrote:
>
> On Fri, Sep 10, 2021 at 2:42 PM Jiri Olsa <[email protected]> wrote:
> >
> > On Wed, Sep 08, 2021 at 10:24:20AM -0700, Namhyung Kim wrote:
> > > Sometimes we want to know an accurate number of samples even if it's
> > > lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which
> > > might be shared with other events. So it's hard to know per-event
> > > lost count.
> > >
> > > Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from
> > > userspace.
> >
> > hi,
> > looks good.. will there be some tools/perf change using this?
>
> Sure, I'll work on the userspace part after this is merged.
> I'm thinking about adding LOST_SAMPLES records at the end
> of the data section as if they came from the kernel.
Can I get your feedback?
Thanks,
Namhyung
Ping!
On Wed, Sep 22, 2021 at 12:34 PM Namhyung Kim <[email protected]> wrote:
>
> Hi Peter,
>
> On Fri, Sep 10, 2021 at 3:40 PM Namhyung Kim <[email protected]> wrote:
> >
> > On Fri, Sep 10, 2021 at 2:42 PM Jiri Olsa <[email protected]> wrote:
> > >
> > > On Wed, Sep 08, 2021 at 10:24:20AM -0700, Namhyung Kim wrote:
> > > > Sometimes we want to know an accurate number of samples even if it's
> > > > lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which
> > > > might be shared with other events. So it's hard to know per-event
> > > > lost count.
> > > >
> > > > Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from
> > > > userspace.
> > >
> > > hi,
> > > looks good.. will there be some tools/perf change using this?
> >
> > Sure, I'll work on the userspace part after this is merged.
> > I'm thinking about adding LOST_SAMPLES records at the end
> > of the data section as if they came from the kernel.
>
> Can I get your feedback?
>
> Thanks,
> Namhyung