2011-05-11 15:49:51

by Borislav Petkov

[permalink] [raw]
Subject: [RFC PATCH] perf: Carve out cgroup-related code

Hi guys,

here's a first prototype carving out cgroup perf code. It builds fine
with both CONFIG_CGROUP_PERF enabled and disabled. Please take a look
while I do the same with the callchain stuff and let me know whether I
should do something differently.

Thanks.
---

Move cgroup perf support into a different compilation module -
kernel/events/cgroup.c - thus slimming perf_event.c some more.

While at it,

* push some oneliners into perf_event.h now that they're used in
multiple .c files.

* drop is_cgroup_event() check for perf_cgroup_defer_enabled() at its
callsite in __perf_event_enable since the latter does the check anyway.

No functional change.

Signed-off-by: Borislav Petkov <[email protected]>
---
include/linux/perf_event.h | 138 ++++++++++++-
kernel/events/Makefile | 1 +
kernel/events/cgroup.c | 324 +++++++++++++++++++++++++++++
kernel/events/core.c | 496 +-------------------------------------------
4 files changed, 473 insertions(+), 486 deletions(-)
create mode 100644 kernel/events/cgroup.c

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3412684..ef65f34 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -954,8 +954,15 @@ struct perf_output_handle {
int sample;
};

+enum event_type_t {
+ EVENT_FLEXIBLE = 0x1,
+ EVENT_PINNED = 0x2,
+ EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
+};
+
#ifdef CONFIG_PERF_EVENTS

+extern struct list_head pmus;
extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
extern void perf_pmu_unregister(struct pmu *pmu);

@@ -1153,6 +1160,47 @@ extern void perf_swevent_put_recursion_context(int rctx);
extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
extern void perf_event_task_tick(void);
+
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+ return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+static inline u64 perf_clock(void)
+{
+ return local_clock();
+}
+
+extern void ctx_sched_out(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type);
+/*
+ * Called with IRQs disabled
+ */
+static inline void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
+{
+ ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
+}
+
+extern void ctx_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type,
+ struct task_struct *task);
+
+static inline void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type,
+ struct task_struct *task)
+{
+ struct perf_event_context *ctx = &cpuctx->ctx;
+
+ ctx_sched_in(ctx, cpuctx, event_type, task);
+}
+
+extern int
+task_function_call(struct task_struct *p, int (*func) (void *info), void *info);
+extern u64 perf_event_time(struct perf_event *event);
#else
static inline void
perf_event_task_sched_in(struct task_struct *task) { }
@@ -1187,7 +1235,95 @@ static inline void perf_swevent_put_recursion_context(int rctx) { }
static inline void perf_event_enable(struct perf_event *event) { }
static inline void perf_event_disable(struct perf_event *event) { }
static inline void perf_event_task_tick(void) { }
-#endif
+static inline void ctx_sched_out(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type) { }
+
+static void ctx_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type,
+ struct task_struct *task) { }
+static inline int
+task_function_call(struct task_struct *p,
+ int (*func) (void *info), void *info) { return -EINVAL; }
+static inline u64 perf_event_time(struct perf_event *event) { return 0; }
+#endif /* CONFIG_PERF_EVENTS */
+
+#ifdef CONFIG_CGROUP_PERF
+extern struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task);
+extern bool perf_cgroup_match(struct perf_event *event);
+extern int
+perf_cgroup_connect(pid_t pid, struct perf_event *event,
+ struct perf_event_attr *attr,
+ struct perf_event *group_leader);
+extern void perf_detach_cgroup(struct perf_event *event);
+
+static inline int is_cgroup_event(struct perf_event *event)
+{
+ return event->cgrp != NULL;
+}
+
+extern u64 perf_cgroup_event_time(struct perf_event *event);
+extern void update_cgrp_time_from_event(struct perf_event *event);
+extern void
+update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx);
+extern void
+perf_cgroup_set_timestamp(struct task_struct *task,
+ struct perf_event_context *ctx);
+
+extern void perf_cgroup_sched_out(struct task_struct *task);
+extern void perf_cgroup_sched_in(struct task_struct *task);
+
+static inline void
+perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+{
+ struct perf_cgroup_info *t;
+ t = per_cpu_ptr(event->cgrp->info, event->cpu);
+ event->shadow_ctx_time = now - t->timestamp;
+}
+
+static inline void
+perf_cgroup_defer_enabled(struct perf_event *event)
+{
+ /*
+ * when the current task's perf cgroup does not match
+ * the event's, we need to remember to call the
+ * perf_mark_enable() function the first time a task with
+ * a matching perf cgroup is scheduled in.
+ */
+ if (is_cgroup_event(event) && !perf_cgroup_match(event))
+ event->cgrp_defer_enabled = 1;
+}
+extern inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+ struct perf_event_context *ctx);
+#else
+static inline struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task) { return NULL; }
+static inline bool perf_cgroup_match(struct perf_event *event) { return true; }
+static inline int
+perf_cgroup_connect(pid_t pid, struct perf_event *event,
+ struct perf_event_attr *attr,
+ struct perf_event *group_leader) { return -EINVAL; }
+static inline void perf_detach_cgroup(struct perf_event *event) { }
+static inline int is_cgroup_event(struct perf_event *event) { return 0; }
+static inline u64 perf_cgroup_event_time(struct perf_event *event) { return 0; }
+static inline void update_cgrp_time_from_event(struct perf_event *e) { }
+static inline void
+update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) { }
+static inline void
+perf_cgroup_set_timestamp(struct task_struct *task,
+ struct perf_event_context *ctx) { }
+static inline void perf_cgroup_sched_out(struct task_struct *task) { }
+static inline void perf_cgroup_sched_in(struct task_struct *task) { }
+static inline void
+perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) { }
+static inline void perf_cgroup_defer_enabled(struct perf_event *event) { }
+static inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+ struct perf_event_context *ctx) { }
+#endif /* CONFIG_CGROUP_PERF */

#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))

diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 1ce23d3..21b7da7 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -4,3 +4,4 @@ endif

obj-y := core.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_CGROUP_PERF) += cgroup.o
diff --git a/kernel/events/cgroup.c b/kernel/events/cgroup.c
new file mode 100644
index 0000000..5516928
--- /dev/null
+++ b/kernel/events/cgroup.c
@@ -0,0 +1,324 @@
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+/*
+ * Must ensure cgroup is pinned (css_get) before calling
+ * this function. In other words, we cannot call this function
+ * if there is no cgroup event for the current CPU context.
+ */
+inline struct perf_cgroup *perf_cgroup_from_task(struct task_struct *task)
+{
+ return container_of(task_subsys_state(task, perf_subsys_id),
+ struct perf_cgroup, css);
+}
+
+inline bool perf_cgroup_match(struct perf_event *event)
+{
+ struct perf_event_context *ctx = event->ctx;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+
+ return !event->cgrp || event->cgrp == cpuctx->cgrp;
+}
+
+static inline void perf_get_cgroup(struct perf_event *event)
+{
+ css_get(&event->cgrp->css);
+}
+
+inline int perf_cgroup_connect(int fd, struct perf_event *event,
+ struct perf_event_attr *attr,
+ struct perf_event *group_leader)
+{
+ struct perf_cgroup *cgrp;
+ struct cgroup_subsys_state *css;
+ struct file *file;
+ int ret = 0, fput_needed;
+
+ file = fget_light(fd, &fput_needed);
+ if (!file)
+ return -EBADF;
+
+ css = cgroup_css_from_dir(file, perf_subsys_id);
+ if (IS_ERR(css)) {
+ ret = PTR_ERR(css);
+ goto out;
+ }
+
+ cgrp = container_of(css, struct perf_cgroup, css);
+ event->cgrp = cgrp;
+
+ /* must be done before we fput() the file */
+ perf_get_cgroup(event);
+
+ /*
+ * all events in a group must monitor
+ * the same cgroup because a task belongs
+ * to only one perf cgroup at a time
+ */
+ if (group_leader && group_leader->cgrp != cgrp) {
+ perf_detach_cgroup(event);
+ ret = -EINVAL;
+ }
+out:
+ fput_light(file, fput_needed);
+ return ret;
+}
+
+static inline void perf_put_cgroup(struct perf_event *event)
+{
+ css_put(&event->cgrp->css);
+}
+
+inline void perf_detach_cgroup(struct perf_event *event)
+{
+ perf_put_cgroup(event);
+ event->cgrp = NULL;
+}
+
+inline u64 perf_cgroup_event_time(struct perf_event *event)
+{
+ struct perf_cgroup_info *t;
+
+ t = per_cpu_ptr(event->cgrp->info, event->cpu);
+ return t->time;
+}
+
+inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ struct perf_event *sub;
+ u64 tstamp = perf_event_time(event);
+
+ if (!event->cgrp_defer_enabled)
+ return;
+
+ event->cgrp_defer_enabled = 0;
+
+ event->tstamp_enabled = tstamp - event->total_time_enabled;
+ list_for_each_entry(sub, &event->sibling_list, group_entry) {
+ if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
+ sub->tstamp_enabled = tstamp - sub->total_time_enabled;
+ sub->cgrp_defer_enabled = 0;
+ }
+ }
+}
+
+static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
+{
+ struct perf_cgroup_info *info;
+ u64 now;
+
+ now = perf_clock();
+
+ info = this_cpu_ptr(cgrp->info);
+
+ info->time += now - info->timestamp;
+ info->timestamp = now;
+}
+
+inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+{
+ struct perf_cgroup *cgrp_out = cpuctx->cgrp;
+ if (cgrp_out)
+ __update_cgrp_time(cgrp_out);
+}
+
+inline void update_cgrp_time_from_event(struct perf_event *event)
+{
+ struct perf_cgroup *cgrp;
+
+ /*
+ * ensure we access cgroup data only when needed and
+ * when we know the cgroup is pinned (css_get)
+ */
+ if (!is_cgroup_event(event))
+ return;
+
+ cgrp = perf_cgroup_from_task(current);
+ /*
+ * Do not update time when cgroup is not active
+ */
+ if (cgrp == event->cgrp)
+ __update_cgrp_time(event->cgrp);
+}
+
+inline void perf_cgroup_set_timestamp(struct task_struct *task,
+ struct perf_event_context *ctx)
+{
+ struct perf_cgroup *cgrp;
+ struct perf_cgroup_info *info;
+
+ /*
+ * ctx->lock held by caller
+ * ensure we do not access cgroup data
+ * unless we have the cgroup pinned (css_get)
+ */
+ if (!task || !ctx->nr_cgroups)
+ return;
+
+ cgrp = perf_cgroup_from_task(task);
+ info = this_cpu_ptr(cgrp->info);
+ info->timestamp = ctx->timestamp;
+}
+
+#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */
+#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */
+
+/*
+ * reschedule events based on the cgroup constraint of task.
+ *
+ * mode SWOUT : schedule out everything
+ * mode SWIN : schedule in based on cgroup for next
+ */
+void perf_cgroup_switch(struct task_struct *task, int mode)
+{
+ struct perf_cpu_context *cpuctx;
+ struct pmu *pmu;
+ unsigned long flags;
+
+ /*
+ * disable interrupts to avoid geting nr_cgroup
+ * changes via __perf_event_disable(). Also
+ * avoids preemption.
+ */
+ local_irq_save(flags);
+
+ /*
+ * we reschedule only in the presence of cgroup
+ * constrained events.
+ */
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(pmu, &pmus, entry) {
+
+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+ perf_pmu_disable(cpuctx->ctx.pmu);
+
+ /*
+ * perf_cgroup_events says at least one
+ * context on this CPU has cgroup events.
+ *
+ * ctx->nr_cgroups reports the number of cgroup
+ * events for a context.
+ */
+ if (cpuctx->ctx.nr_cgroups > 0) {
+
+ if (mode & PERF_CGROUP_SWOUT) {
+ cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+ /*
+ * must not be done before ctxswout due
+ * to event_filter_match() in event_sched_out()
+ */
+ cpuctx->cgrp = NULL;
+ }
+
+ if (mode & PERF_CGROUP_SWIN) {
+ WARN_ON_ONCE(cpuctx->cgrp);
+ /* set cgrp before ctxsw in to
+ * allow event_filter_match() to not
+ * have to pass task around
+ */
+ cpuctx->cgrp = perf_cgroup_from_task(task);
+ cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
+ }
+ }
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ }
+
+ rcu_read_unlock();
+
+ local_irq_restore(flags);
+}
+
+inline void perf_cgroup_sched_out(struct task_struct *task)
+{
+ perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+}
+
+inline void perf_cgroup_sched_in(struct task_struct *task)
+{
+ perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+}
+
+static int __perf_cgroup_move(void *info)
+{
+ struct task_struct *task = info;
+ perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
+ return 0;
+}
+
+static void perf_cgroup_move(struct task_struct *task)
+{
+ task_function_call(task, __perf_cgroup_move, task);
+}
+
+static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct cgroup *old_cgrp, struct task_struct *task,
+ bool threadgroup)
+{
+ perf_cgroup_move(task);
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+ perf_cgroup_move(c);
+ }
+ rcu_read_unlock();
+ }
+}
+
+static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct cgroup *old_cgrp, struct task_struct *task)
+{
+ /*
+ * cgroup_exit() is called in the copy_process() failure path.
+ * Ignore this case since the task hasn't ran yet, this avoids
+ * trying to poke a half freed task state from generic code.
+ */
+ if (!(task->flags & PF_EXITING))
+ return;
+
+ perf_cgroup_move(task);
+}
+
+static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup_subsys *ss,
+ struct cgroup *cont)
+{
+ struct perf_cgroup *jc;
+
+ jc = kzalloc(sizeof(*jc), GFP_KERNEL);
+ if (!jc)
+ return ERR_PTR(-ENOMEM);
+
+ jc->info = alloc_percpu(struct perf_cgroup_info);
+ if (!jc->info) {
+ kfree(jc);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return &jc->css;
+}
+
+static void perf_cgroup_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cont)
+{
+ struct perf_cgroup *jc;
+ jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
+ struct perf_cgroup, css);
+ free_percpu(jc->info);
+ kfree(jc);
+}
+
+struct cgroup_subsys perf_subsys = {
+ .name = "perf_event",
+ .subsys_id = perf_subsys_id,
+ .create = perf_cgroup_create,
+ .destroy = perf_cgroup_destroy,
+ .exit = perf_cgroup_exit,
+ .attach = perf_cgroup_attach,
+};
+
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0fc34a3..b65905f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -72,7 +72,7 @@ static void remote_function(void *data)
* -ESRCH - when the process isn't running
* -EAGAIN - when the process moved away
*/
-static int
+int
task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
{
struct remote_function_call data = {
@@ -115,12 +115,6 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
PERF_FLAG_FD_OUTPUT |\
PERF_FLAG_PID_CGROUP)

-enum event_type_t {
- EVENT_FLEXIBLE = 0x1,
- EVENT_PINNED = 0x2,
- EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
-};
-
/*
* perf_sched_events : >0 events exist
* perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
@@ -132,7 +126,7 @@ static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_task_events __read_mostly;

-static LIST_HEAD(pmus);
+LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;

@@ -172,15 +166,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write,

static atomic64_t perf_event_id;

-static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
- enum event_type_t event_type);
-
-static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
- enum event_type_t event_type,
- struct task_struct *task);
-
static void update_context_time(struct perf_event_context *ctx);
-static u64 perf_event_time(struct perf_event *event);

void __weak perf_event_print_debug(void) { }

@@ -189,366 +175,6 @@ extern __weak const char *perf_pmu_name(void)
return "pmu";
}

-static inline u64 perf_clock(void)
-{
- return local_clock();
-}
-
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
- return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-#ifdef CONFIG_CGROUP_PERF
-
-/*
- * Must ensure cgroup is pinned (css_get) before calling
- * this function. In other words, we cannot call this function
- * if there is no cgroup event for the current CPU context.
- */
-static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
-{
- return container_of(task_subsys_state(task, perf_subsys_id),
- struct perf_cgroup, css);
-}
-
-static inline bool
-perf_cgroup_match(struct perf_event *event)
-{
- struct perf_event_context *ctx = event->ctx;
- struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
- return !event->cgrp || event->cgrp == cpuctx->cgrp;
-}
-
-static inline void perf_get_cgroup(struct perf_event *event)
-{
- css_get(&event->cgrp->css);
-}
-
-static inline void perf_put_cgroup(struct perf_event *event)
-{
- css_put(&event->cgrp->css);
-}
-
-static inline void perf_detach_cgroup(struct perf_event *event)
-{
- perf_put_cgroup(event);
- event->cgrp = NULL;
-}
-
-static inline int is_cgroup_event(struct perf_event *event)
-{
- return event->cgrp != NULL;
-}
-
-static inline u64 perf_cgroup_event_time(struct perf_event *event)
-{
- struct perf_cgroup_info *t;
-
- t = per_cpu_ptr(event->cgrp->info, event->cpu);
- return t->time;
-}
-
-static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
-{
- struct perf_cgroup_info *info;
- u64 now;
-
- now = perf_clock();
-
- info = this_cpu_ptr(cgrp->info);
-
- info->time += now - info->timestamp;
- info->timestamp = now;
-}
-
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
-{
- struct perf_cgroup *cgrp_out = cpuctx->cgrp;
- if (cgrp_out)
- __update_cgrp_time(cgrp_out);
-}
-
-static inline void update_cgrp_time_from_event(struct perf_event *event)
-{
- struct perf_cgroup *cgrp;
-
- /*
- * ensure we access cgroup data only when needed and
- * when we know the cgroup is pinned (css_get)
- */
- if (!is_cgroup_event(event))
- return;
-
- cgrp = perf_cgroup_from_task(current);
- /*
- * Do not update time when cgroup is not active
- */
- if (cgrp == event->cgrp)
- __update_cgrp_time(event->cgrp);
-}
-
-static inline void
-perf_cgroup_set_timestamp(struct task_struct *task,
- struct perf_event_context *ctx)
-{
- struct perf_cgroup *cgrp;
- struct perf_cgroup_info *info;
-
- /*
- * ctx->lock held by caller
- * ensure we do not access cgroup data
- * unless we have the cgroup pinned (css_get)
- */
- if (!task || !ctx->nr_cgroups)
- return;
-
- cgrp = perf_cgroup_from_task(task);
- info = this_cpu_ptr(cgrp->info);
- info->timestamp = ctx->timestamp;
-}
-
-#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */
-#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */
-
-/*
- * reschedule events based on the cgroup constraint of task.
- *
- * mode SWOUT : schedule out everything
- * mode SWIN : schedule in based on cgroup for next
- */
-void perf_cgroup_switch(struct task_struct *task, int mode)
-{
- struct perf_cpu_context *cpuctx;
- struct pmu *pmu;
- unsigned long flags;
-
- /*
- * disable interrupts to avoid geting nr_cgroup
- * changes via __perf_event_disable(). Also
- * avoids preemption.
- */
- local_irq_save(flags);
-
- /*
- * we reschedule only in the presence of cgroup
- * constrained events.
- */
- rcu_read_lock();
-
- list_for_each_entry_rcu(pmu, &pmus, entry) {
-
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
- perf_pmu_disable(cpuctx->ctx.pmu);
-
- /*
- * perf_cgroup_events says at least one
- * context on this CPU has cgroup events.
- *
- * ctx->nr_cgroups reports the number of cgroup
- * events for a context.
- */
- if (cpuctx->ctx.nr_cgroups > 0) {
-
- if (mode & PERF_CGROUP_SWOUT) {
- cpu_ctx_sched_out(cpuctx, EVENT_ALL);
- /*
- * must not be done before ctxswout due
- * to event_filter_match() in event_sched_out()
- */
- cpuctx->cgrp = NULL;
- }
-
- if (mode & PERF_CGROUP_SWIN) {
- WARN_ON_ONCE(cpuctx->cgrp);
- /* set cgrp before ctxsw in to
- * allow event_filter_match() to not
- * have to pass task around
- */
- cpuctx->cgrp = perf_cgroup_from_task(task);
- cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
- }
- }
-
- perf_pmu_enable(cpuctx->ctx.pmu);
- }
-
- rcu_read_unlock();
-
- local_irq_restore(flags);
-}
-
-static inline void perf_cgroup_sched_out(struct task_struct *task)
-{
- perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
-}
-
-static inline void perf_cgroup_sched_in(struct task_struct *task)
-{
- perf_cgroup_switch(task, PERF_CGROUP_SWIN);
-}
-
-static inline int perf_cgroup_connect(int fd, struct perf_event *event,
- struct perf_event_attr *attr,
- struct perf_event *group_leader)
-{
- struct perf_cgroup *cgrp;
- struct cgroup_subsys_state *css;
- struct file *file;
- int ret = 0, fput_needed;
-
- file = fget_light(fd, &fput_needed);
- if (!file)
- return -EBADF;
-
- css = cgroup_css_from_dir(file, perf_subsys_id);
- if (IS_ERR(css)) {
- ret = PTR_ERR(css);
- goto out;
- }
-
- cgrp = container_of(css, struct perf_cgroup, css);
- event->cgrp = cgrp;
-
- /* must be done before we fput() the file */
- perf_get_cgroup(event);
-
- /*
- * all events in a group must monitor
- * the same cgroup because a task belongs
- * to only one perf cgroup at a time
- */
- if (group_leader && group_leader->cgrp != cgrp) {
- perf_detach_cgroup(event);
- ret = -EINVAL;
- }
-out:
- fput_light(file, fput_needed);
- return ret;
-}
-
-static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
-{
- struct perf_cgroup_info *t;
- t = per_cpu_ptr(event->cgrp->info, event->cpu);
- event->shadow_ctx_time = now - t->timestamp;
-}
-
-static inline void
-perf_cgroup_defer_enabled(struct perf_event *event)
-{
- /*
- * when the current task's perf cgroup does not match
- * the event's, we need to remember to call the
- * perf_mark_enable() function the first time a task with
- * a matching perf cgroup is scheduled in.
- */
- if (is_cgroup_event(event) && !perf_cgroup_match(event))
- event->cgrp_defer_enabled = 1;
-}
-
-static inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
- struct perf_event_context *ctx)
-{
- struct perf_event *sub;
- u64 tstamp = perf_event_time(event);
-
- if (!event->cgrp_defer_enabled)
- return;
-
- event->cgrp_defer_enabled = 0;
-
- event->tstamp_enabled = tstamp - event->total_time_enabled;
- list_for_each_entry(sub, &event->sibling_list, group_entry) {
- if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
- sub->tstamp_enabled = tstamp - sub->total_time_enabled;
- sub->cgrp_defer_enabled = 0;
- }
- }
-}
-#else /* !CONFIG_CGROUP_PERF */
-
-static inline bool
-perf_cgroup_match(struct perf_event *event)
-{
- return true;
-}
-
-static inline void perf_detach_cgroup(struct perf_event *event)
-{}
-
-static inline int is_cgroup_event(struct perf_event *event)
-{
- return 0;
-}
-
-static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event)
-{
- return 0;
-}
-
-static inline void update_cgrp_time_from_event(struct perf_event *event)
-{
-}
-
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
-{
-}
-
-static inline void perf_cgroup_sched_out(struct task_struct *task)
-{
-}
-
-static inline void perf_cgroup_sched_in(struct task_struct *task)
-{
-}
-
-static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
- struct perf_event_attr *attr,
- struct perf_event *group_leader)
-{
- return -EINVAL;
-}
-
-static inline void
-perf_cgroup_set_timestamp(struct task_struct *task,
- struct perf_event_context *ctx)
-{
-}
-
-void
-perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
-{
-}
-
-static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
-{
-}
-
-static inline u64 perf_cgroup_event_time(struct perf_event *event)
-{
- return 0;
-}
-
-static inline void
-perf_cgroup_defer_enabled(struct perf_event *event)
-{
-}
-
-static inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
- struct perf_event_context *ctx)
-{
-}
-#endif
-
void perf_pmu_disable(struct pmu *pmu)
{
int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -727,7 +353,7 @@ static void update_context_time(struct perf_event_context *ctx)
ctx->timestamp = now;
}

-static u64 perf_event_time(struct perf_event *event)
+u64 perf_event_time(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;

@@ -1641,8 +1267,7 @@ static int __perf_event_enable(void *info)
__perf_event_mark_enabled(event, ctx);

if (!event_filter_match(event)) {
- if (is_cgroup_event(event))
- perf_cgroup_defer_enabled(event);
+ perf_cgroup_defer_enabled(event);
goto unlock;
}

@@ -1761,9 +1386,9 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
return 0;
}

-static void ctx_sched_out(struct perf_event_context *ctx,
- struct perf_cpu_context *cpuctx,
- enum event_type_t event_type)
+void ctx_sched_out(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
{
struct perf_event *event;

@@ -1988,15 +1613,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
cpuctx->task_ctx = NULL;
}

-/*
- * Called with IRQs disabled
- */
-static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
- enum event_type_t event_type)
-{
- ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
-}
-
static void
ctx_pinned_sched_in(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx)
@@ -2056,11 +1672,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
}
}

-static void
-ctx_sched_in(struct perf_event_context *ctx,
- struct perf_cpu_context *cpuctx,
- enum event_type_t event_type,
- struct task_struct *task)
+void ctx_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type,
+ struct task_struct *task)
{
u64 now;

@@ -2087,15 +1702,6 @@ out:
raw_spin_unlock(&ctx->lock);
}

-static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
- enum event_type_t event_type,
- struct task_struct *task)
-{
- struct perf_event_context *ctx = &cpuctx->ctx;
-
- ctx_sched_in(ctx, cpuctx, event_type, task);
-}
-
static void task_ctx_sched_in(struct perf_event_context *ctx,
enum event_type_t event_type)
{
@@ -7373,83 +6979,3 @@ unlock:
return ret;
}
device_initcall(perf_event_sysfs_init);
-
-#ifdef CONFIG_CGROUP_PERF
-static struct cgroup_subsys_state *perf_cgroup_create(
- struct cgroup_subsys *ss, struct cgroup *cont)
-{
- struct perf_cgroup *jc;
-
- jc = kzalloc(sizeof(*jc), GFP_KERNEL);
- if (!jc)
- return ERR_PTR(-ENOMEM);
-
- jc->info = alloc_percpu(struct perf_cgroup_info);
- if (!jc->info) {
- kfree(jc);
- return ERR_PTR(-ENOMEM);
- }
-
- return &jc->css;
-}
-
-static void perf_cgroup_destroy(struct cgroup_subsys *ss,
- struct cgroup *cont)
-{
- struct perf_cgroup *jc;
- jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
- struct perf_cgroup, css);
- free_percpu(jc->info);
- kfree(jc);
-}
-
-static int __perf_cgroup_move(void *info)
-{
- struct task_struct *task = info;
- perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
- return 0;
-}
-
-static void perf_cgroup_move(struct task_struct *task)
-{
- task_function_call(task, __perf_cgroup_move, task);
-}
-
-static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *task,
- bool threadgroup)
-{
- perf_cgroup_move(task);
- if (threadgroup) {
- struct task_struct *c;
- rcu_read_lock();
- list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
- perf_cgroup_move(c);
- }
- rcu_read_unlock();
- }
-}
-
-static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *task)
-{
- /*
- * cgroup_exit() is called in the copy_process() failure path.
- * Ignore this case since the task hasn't ran yet, this avoids
- * trying to poke a half freed task state from generic code.
- */
- if (!(task->flags & PF_EXITING))
- return;
-
- perf_cgroup_move(task);
-}
-
-struct cgroup_subsys perf_subsys = {
- .name = "perf_event",
- .subsys_id = perf_subsys_id,
- .create = perf_cgroup_create,
- .destroy = perf_cgroup_destroy,
- .exit = perf_cgroup_exit,
- .attach = perf_cgroup_attach,
-};
-#endif /* CONFIG_CGROUP_PERF */
--
1.7.4.rc2


--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632


2011-05-11 16:18:09

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Wed, 2011-05-11 at 14:11 +0200, Borislav Petkov wrote:
> include/linux/perf_event.h | 138 ++++++++++++-

I don't like exposing all that in a kernel wide header.. should we maybe
have kernel/events/internal.h?

2011-05-11 17:30:18

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code


* Peter Zijlstra <[email protected]> wrote:

> On Wed, 2011-05-11 at 14:11 +0200, Borislav Petkov wrote:
> > include/linux/perf_event.h | 138 ++++++++++++-
>
> I don't like exposing all that in a kernel wide header.. should we maybe
> have kernel/events/internal.h?

Yeah.

We could eventually avoid exposing much of struct perf_event as well: arch PMU
drivers need the struct perf_event_attr, struct hw_perf_event and not much
else.

Thanks,

Ingo

2011-05-11 16:29:50

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Wed, May 11, 2011 at 09:46:40AM -0400, Peter Zijlstra wrote:
> On Wed, 2011-05-11 at 14:11 +0200, Borislav Petkov wrote:
> > include/linux/perf_event.h | 138 ++++++++++++-
>
> I don't like exposing all that in a kernel wide header.. should we maybe
> have kernel/events/internal.h?

Sounds prudent. Maybe add a patch ontop that moves all perf_event.h
exports which are shared _only_ between kernel/events/*.c compilation
units to an internal.h header? Ingo?

Thanks.

--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

2011-05-11 17:04:53

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code


* Borislav Petkov <[email protected]> wrote:

> On Wed, May 11, 2011 at 09:46:40AM -0400, Peter Zijlstra wrote:
> > On Wed, 2011-05-11 at 14:11 +0200, Borislav Petkov wrote:
> > > include/linux/perf_event.h | 138 ++++++++++++-
> >
> > I don't like exposing all that in a kernel wide header.. should we maybe
> > have kernel/events/internal.h?
>
> Sounds prudent. Maybe add a patch ontop that moves all perf_event.h
> exports which are shared _only_ between kernel/events/*.c compilation
> units to an internal.h header? Ingo?

There's not that many that are in perf_event.h and are only used in
kernel/event/core.c.

Below is a generated list of all exported functions and their usage sites. Out
of 40 functions 7 should not be exported:

__perf_sw_event()
perf_swevent_put_recursion_context()
perf_pmu_unregister()
perf_output_copy()
__perf_event_task_sched_in()
__perf_event_task_sched_out()
perf_event_read_value()

Thanks,

Ingo


perf_bp_event():
arch/arm/kernel/hw_breakpoint.c
arch/powerpc/kernel/hw_breakpoint.c
arch/sh/kernel/hw_breakpoint.c
arch/x86/kernel/hw_breakpoint.c
include/linux/perf_event.h
kernel/events/core.c

perf_callchain_kernel():
arch/arm/kernel/perf_event.c
arch/mips/kernel/perf_event.c
arch/powerpc/kernel/perf_callchain.c
arch/sh/kernel/perf_callchain.c
arch/sparc/kernel/perf_event.c
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/events/core.c

perf_callchain_user():
arch/arm/kernel/perf_event.c
arch/mips/kernel/perf_event.c
arch/powerpc/kernel/perf_callchain.c
arch/sparc/kernel/perf_event.c
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/events/core.c

perf_event_comm():
fs/exec.c
include/linux/perf_event.h
kernel/events/core.c

perf_event_delayed_put():
include/linux/perf_event.h
kernel/events/core.c
kernel/exit.c

perf_event_disable():
arch/powerpc/kernel/hw_breakpoint.c
include/linux/perf_event.h
kernel/events/core.c
kernel/events/hw_breakpoint.c
kernel/watchdog.c

perf_event_enable():
include/linux/perf_event.h
kernel/events/core.c
kernel/events/hw_breakpoint.c
kernel/watchdog.c

perf_event_exit_task():
fs/exec.c
include/linux/perf_event.h
kernel/events/core.c
kernel/exit.c

perf_event_fork():
include/linux/perf_event.h
kernel/events/core.c
kernel/fork.c

perf_event_free_task():
include/linux/perf_event.h
kernel/events/core.c
kernel/fork.c

perf_event_init():
arch/alpha/kernel/perf_event.c
arch/arm/kernel/perf_event.c
arch/mips/kernel/perf_event.c
arch/mips/kernel/perf_event_mipsxx.c
arch/sh/kernel/perf_event.c
include/linux/perf_event.h
init/main.c
kernel/events/core.c
kernel/fork.c
tools/perf/design.txt

perf_event_init_task():
include/linux/perf_event.h
kernel/events/core.c
kernel/fork.c

perf_event_mmap():
include/linux/perf_event.h
kernel/events/core.c
mm/mmap.c
mm/mprotect.c
tools/perf/design.txt
tools/perf/perf.h
tools/perf/util/event.c

perf_event_overflow():
arch/alpha/kernel/perf_event.c
arch/arm/kernel/perf_event_v6.c
arch/arm/kernel/perf_event_v7.c
arch/arm/kernel/perf_event_xscale.c
arch/mips/kernel/perf_event.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/sparc/kernel/perf_event.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
arch/x86/kernel/cpu/perf_event_p4.c
include/linux/perf_event.h
kernel/events/core.c

perf_event_print_debug():
arch/alpha/kernel/perf_event.c
arch/powerpc/kernel/perf_event.c
arch/sparc/kernel/perf_event.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_intel.c
drivers/tty/sysrq.c
include/linux/perf_event.h
kernel/events/core.c

perf_event_read_value():
include/linux/perf_event.h
kernel/events/core.c

perf_event_release_kernel():
drivers/oprofile/oprofile_perf.c
include/linux/perf_event.h
kernel/events/core.c
kernel/events/hw_breakpoint.c
kernel/watchdog.c

perf_event_task_disable():
include/linux/perf_event.h
kernel/events/core.c
kernel/sys.c

perf_event_task_enable():
include/linux/perf_event.h
kernel/events/core.c
kernel/sys.c

__perf_event_task_sched_in():
include/linux/perf_event.h
kernel/events/core.c

__perf_event_task_sched_out():
include/linux/perf_event.h
kernel/events/core.c

perf_event_task_tick():
include/linux/perf_event.h
kernel/events/core.c
kernel/sched.c

perf_event_update_userpage():
arch/alpha/kernel/perf_event.c
arch/arm/kernel/perf_event.c
arch/mips/kernel/perf_event.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/sh/kernel/perf_event.c
arch/sparc/kernel/perf_event.c
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/events/core.c

perf_num_counters():
arch/arm/kernel/perf_event.c
arch/sh/kernel/perf_event.c
arch/sh/oprofile/common.c
drivers/oprofile/oprofile_perf.c
include/linux/perf_event.h

perf_output_begin():
arch/x86/kernel/cpu/perf_event_intel_ds.c
include/linux/perf_event.h
kernel/events/core.c

perf_output_copy():
include/linux/perf_event.h
kernel/events/core.c

perf_output_end():
arch/x86/kernel/cpu/perf_event_intel_ds.c
include/linux/perf_event.h
kernel/events/core.c

perf_output_sample():
arch/x86/kernel/cpu/perf_event_intel_ds.c
include/linux/perf_event.h
kernel/events/core.c

perf_pmu_disable():
arch/alpha/kernel/perf_event.c
arch/arm/kernel/perf_event.c
arch/mips/kernel/perf_event.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/sh/kernel/perf_event.c
arch/sparc/kernel/perf_event.c
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/events/core.c

perf_pmu_enable():
arch/alpha/kernel/perf_event.c
arch/arm/kernel/perf_event.c
arch/mips/kernel/perf_event.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/sh/kernel/perf_event.c
arch/sparc/kernel/perf_event.c
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/events/core.c

perf_pmu_register():
arch/alpha/kernel/perf_event.c
arch/arm/kernel/perf_event.c
arch/mips/kernel/perf_event_mipsxx.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/sh/kernel/perf_event.c
arch/sparc/kernel/perf_event.c
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/events/core.c
kernel/events/hw_breakpoint.c

perf_pmu_unregister():
include/linux/perf_event.h
kernel/events/core.c

perf_prepare_sample():
arch/x86/kernel/cpu/perf_event_intel_ds.c
include/linux/perf_event.h
kernel/events/core.c

perf_proc_update_handler():
include/linux/perf_event.h
kernel/events/core.c
kernel/sysctl.c

perf_register_guest_info_callbacks():
arch/x86/kvm/x86.c
include/linux/perf_event.h
kernel/events/core.c

__perf_sw_event():
include/linux/perf_event.h
kernel/events/core.c

perf_swevent_get_recursion_context():
include/linux/perf_event.h
kernel/events/core.c
kernel/trace/trace_event_perf.c

perf_swevent_put_recursion_context():
include/linux/perf_event.h
kernel/events/core.c

perf_tp_event():
include/linux/ftrace_event.h
include/linux/perf_event.h
include/trace/ftrace.h
kernel/events/core.c

perf_unregister_guest_info_callbacks():
arch/x86/kvm/x86.c
include/linux/perf_event.h
kernel/events/core.c

2011-05-11 15:49:52

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Wed, May 11, 2011 at 10:13:42AM -0400, Ingo Molnar wrote:
>
> * Borislav Petkov <[email protected]> wrote:
>
> > On Wed, May 11, 2011 at 09:46:40AM -0400, Peter Zijlstra wrote:
> > > On Wed, 2011-05-11 at 14:11 +0200, Borislav Petkov wrote:
> > > > include/linux/perf_event.h | 138 ++++++++++++-
> > >
> > > I don't like exposing all that in a kernel wide header.. should we maybe
> > > have kernel/events/internal.h?
> >
> > Sounds prudent. Maybe add a patch ontop that moves all perf_event.h
> > exports which are shared _only_ between kernel/events/*.c compilation
> > units to an internal.h header? Ingo?
>
> There's not that many that are in perf_event.h and are only used in
> kernel/event/core.c.
>
> Below is a generated list of all exported functions and their usage sites. Out
> of 40 functions 7 should not be exported:
>
> __perf_sw_event()
> perf_swevent_put_recursion_context()
> perf_pmu_unregister()
> perf_output_copy()
> __perf_event_task_sched_in()
> __perf_event_task_sched_out()
> perf_event_read_value()

Right, but splitting perf_event.c further would cause a bunch more
of them to go up in the header since they're being shared among
kernel/events/*.c files. But in the end, this is a judgement call - I
mean, even the polluting ones have a clearly defined namespace starting
their names with {_-,}perf_*. If you asked me, I'd do an internal.h
header from the get-go so that all is kept as clean as possible.

--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

2011-05-11 16:41:59

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code


* Borislav Petkov <[email protected]> wrote:

> On Wed, May 11, 2011 at 10:13:42AM -0400, Ingo Molnar wrote:
> >
> > * Borislav Petkov <[email protected]> wrote:
> >
> > > On Wed, May 11, 2011 at 09:46:40AM -0400, Peter Zijlstra wrote:
> > > > On Wed, 2011-05-11 at 14:11 +0200, Borislav Petkov wrote:
> > > > > include/linux/perf_event.h | 138 ++++++++++++-
> > > >
> > > > I don't like exposing all that in a kernel wide header.. should we maybe
> > > > have kernel/events/internal.h?
> > >
> > > Sounds prudent. Maybe add a patch ontop that moves all perf_event.h
> > > exports which are shared _only_ between kernel/events/*.c compilation
> > > units to an internal.h header? Ingo?
> >
> > There's not that many that are in perf_event.h and are only used in
> > kernel/event/core.c.
> >
> > Below is a generated list of all exported functions and their usage sites. Out
> > of 40 functions 7 should not be exported:
> >
> > __perf_sw_event()
> > perf_swevent_put_recursion_context()
> > perf_pmu_unregister()
> > perf_output_copy()
> > __perf_event_task_sched_in()
> > __perf_event_task_sched_out()
> > perf_event_read_value()
>
> Right, but splitting perf_event.c further would cause a bunch more of them to
> go up in the header since they're being shared among kernel/events/*.c files.
> But in the end, this is a judgement call - I mean, even the polluting ones
> have a clearly defined namespace starting their names with {_-,}perf_*. If
> you asked me, I'd do an internal.h header from the get-go so that all is kept
> as clean as possible.

We seem to be in wild agreement wrt. internal.h!

Thanks,

Ingo

2011-05-11 17:09:50

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Wed, May 11, 2011 at 11:21:56AM -0400, Ingo Molnar wrote:

[..]

> > > There's not that many that are in perf_event.h and are only used in
> > > kernel/event/core.c.
> > >
> > > Below is a generated list of all exported functions and their usage sites. Out
> > > of 40 functions 7 should not be exported:
> > >
> > > __perf_sw_event()
> > > perf_swevent_put_recursion_context()
> > > perf_pmu_unregister()
> > > perf_output_copy()
> > > __perf_event_task_sched_in()
> > > __perf_event_task_sched_out()
> > > perf_event_read_value()
> >
> > Right, but splitting perf_event.c further would cause a bunch more of them to
> > go up in the header since they're being shared among kernel/events/*.c files.
> > But in the end, this is a judgement call - I mean, even the polluting ones
> > have a clearly defined namespace starting their names with {_-,}perf_*. If
> > you asked me, I'd do an internal.h header from the get-go so that all is kept
> > as clean as possible.
>
> We seem to be in wild agreement wrt. internal.h!

Done, patch below. I've moved all exports local to kernel/events/* to
internal.h. Branch at

git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git perf-split

also updated.

--
From: Borislav Petkov <[email protected]>
Date: Wed, 11 May 2011 19:02:10 +0200
Subject: [PATCH] perf: Cleanup namespace

Add an internal header containing all function declarations used only by
kernel/events/* compilation units. Drop declarations used in one .c file
only.

No functionality change.

Signed-off-by: Borislav Petkov <[email protected]>
---
include/linux/perf_event.h | 132 --------------------------------------------
kernel/events/callchain.c | 3 +
kernel/events/cgroup.c | 2 +
kernel/events/core.c | 2 +
kernel/events/internal.h | 129 +++++++++++++++++++++++++++++++++++++++++++
5 files changed, 136 insertions(+), 132 deletions(-)
create mode 100644 kernel/events/internal.h

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7978850..6b25452 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -963,7 +963,6 @@ enum event_type_t {
#ifdef CONFIG_PERF_EVENTS
extern struct list_head pmus;
extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
-extern void perf_pmu_unregister(struct pmu *pmu);

extern int perf_num_counters(void);
extern const char *perf_pmu_name(void);
@@ -985,8 +984,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
int cpu,
struct task_struct *task,
perf_overflow_handler_t callback);
-extern u64 perf_event_read_value(struct perf_event *event,
- u64 *enabled, u64 *running);

struct perf_sample_data {
u64 type;
@@ -1152,60 +1149,10 @@ extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size,
int nmi, int sample);
extern void perf_output_end(struct perf_output_handle *handle);
-extern void perf_output_copy(struct perf_output_handle *handle,
- const void *buf, unsigned int len);
extern int perf_swevent_get_recursion_context(void);
-extern void perf_swevent_put_recursion_context(int rctx);
extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
extern void perf_event_task_tick(void);
-
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
- return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-static inline u64 perf_clock(void)
-{
- return local_clock();
-}
-
-extern void ctx_sched_out(struct perf_event_context *ctx,
- struct perf_cpu_context *cpuctx,
- enum event_type_t event_type);
-/*
- * Called with IRQs disabled
- */
-static inline void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
- enum event_type_t event_type)
-{
- ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
-}
-
-extern void ctx_sched_in(struct perf_event_context *ctx,
- struct perf_cpu_context *cpuctx,
- enum event_type_t event_type,
- struct task_struct *task);
-
-static inline void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
- enum event_type_t event_type,
- struct task_struct *task)
-{
- struct perf_event_context *ctx = &cpuctx->ctx;
-
- ctx_sched_in(ctx, cpuctx, event_type, task);
-}
-
-extern int
-task_function_call(struct task_struct *p, int (*func) (void *info), void *info);
-extern u64 perf_event_time(struct perf_event *event);
-
-extern int get_recursion_context(int *recursion);
-extern inline void put_recursion_context(int *recursion, int rctx);
-extern void put_callchain_buffers(void);
-extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
-extern int get_callchain_buffers(void);
#else
static inline void
perf_event_task_sched_in(struct task_struct *task) { }
@@ -1236,90 +1183,11 @@ static inline void perf_event_comm(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
-static inline void perf_swevent_put_recursion_context(int rctx) { }
static inline void perf_event_enable(struct perf_event *event) { }
static inline void perf_event_disable(struct perf_event *event) { }
static inline void perf_event_task_tick(void) { }
#endif /* CONFIG_PERF_EVENTS */

-#ifdef CONFIG_CGROUP_PERF
-extern struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task);
-extern bool perf_cgroup_match(struct perf_event *event);
-extern int
-perf_cgroup_connect(pid_t pid, struct perf_event *event,
- struct perf_event_attr *attr,
- struct perf_event *group_leader);
-extern void perf_detach_cgroup(struct perf_event *event);
-
-static inline int is_cgroup_event(struct perf_event *event)
-{
- return event->cgrp != NULL;
-}
-
-extern u64 perf_cgroup_event_time(struct perf_event *event);
-extern void update_cgrp_time_from_event(struct perf_event *event);
-extern void
-update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx);
-extern void
-perf_cgroup_set_timestamp(struct task_struct *task,
- struct perf_event_context *ctx);
-
-extern void perf_cgroup_sched_out(struct task_struct *task);
-extern void perf_cgroup_sched_in(struct task_struct *task);
-
-static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
-{
- struct perf_cgroup_info *t;
- t = per_cpu_ptr(event->cgrp->info, event->cpu);
- event->shadow_ctx_time = now - t->timestamp;
-}
-
-static inline void
-perf_cgroup_defer_enabled(struct perf_event *event)
-{
- /*
- * when the current task's perf cgroup does not match
- * the event's, we need to remember to call the
- * perf_mark_enable() function the first time a task with
- * a matching perf cgroup is scheduled in.
- */
- if (is_cgroup_event(event) && !perf_cgroup_match(event))
- event->cgrp_defer_enabled = 1;
-}
-extern inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
- struct perf_event_context *ctx);
-#else
-static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task) { return NULL; }
-static inline bool perf_cgroup_match(struct perf_event *event) { return true; }
-static inline int
-perf_cgroup_connect(pid_t pid, struct perf_event *event,
- struct perf_event_attr *attr,
- struct perf_event *group_leader) { return -EINVAL; }
-static inline void perf_detach_cgroup(struct perf_event *event) { }
-static inline int is_cgroup_event(struct perf_event *event) { return 0; }
-static inline u64 perf_cgroup_event_time(struct perf_event *event) { return 0; }
-static inline void update_cgrp_time_from_event(struct perf_event *e) { }
-static inline void
-update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) { }
-static inline void
-perf_cgroup_set_timestamp(struct task_struct *task,
- struct perf_event_context *ctx) { }
-static inline void perf_cgroup_sched_out(struct task_struct *task) { }
-static inline void perf_cgroup_sched_in(struct task_struct *task) { }
-static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) { }
-static inline void perf_cgroup_defer_enabled(struct perf_event *event) { }
-static inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
- struct perf_event_context *ctx) { }
-#endif /* CONFIG_CGROUP_PERF */
-
-#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
-
/*
* This has to have a higher priority than migration_notifier in sched.c.
*/
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 0b495fc..86bb85a 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -1,6 +1,9 @@
#include <linux/slab.h>
#include <linux/hardirq.h>
#include <linux/perf_event.h>
+
+#include "internal.h"
+
/*
* Callchain support
*/
diff --git a/kernel/events/cgroup.c b/kernel/events/cgroup.c
index 5516928..e50c677 100644
--- a/kernel/events/cgroup.c
+++ b/kernel/events/cgroup.c
@@ -2,6 +2,8 @@
#include <linux/slab.h>
#include <linux/perf_event.h>

+#include "internal.h"
+
/*
* Must ensure cgroup is pinned (css_get) before calling
* this function. In other words, we cannot call this function
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 364cad6..e817d91 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -38,6 +38,8 @@

#include <asm/irq_regs.h>

+#include "internal.h"
+
struct remote_function_call {
struct task_struct *p;
int (*func)(void *info);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
new file mode 100644
index 0000000..5cbec39
--- /dev/null
+++ b/kernel/events/internal.h
@@ -0,0 +1,129 @@
+#ifndef __PERF_EVENT_INTERNAL__
+#define __PERF_EVENT_INTERNAL__
+
+#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
+
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+ return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+static inline u64 perf_clock(void)
+{
+ return local_clock();
+}
+
+extern void ctx_sched_out(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type);
+/*
+ * Called with IRQs disabled
+ */
+static inline void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
+{
+ ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
+}
+
+extern void ctx_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type,
+ struct task_struct *task);
+
+static inline void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type,
+ struct task_struct *task)
+{
+ struct perf_event_context *ctx = &cpuctx->ctx;
+
+ ctx_sched_in(ctx, cpuctx, event_type, task);
+}
+
+extern int
+task_function_call(struct task_struct *p, int (*func) (void *info), void *info);
+extern u64 perf_event_time(struct perf_event *event);
+
+#ifdef CONFIG_CGROUP_PERF
+extern struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task);
+extern bool perf_cgroup_match(struct perf_event *event);
+extern int
+perf_cgroup_connect(pid_t pid, struct perf_event *event,
+ struct perf_event_attr *attr,
+ struct perf_event *group_leader);
+extern void perf_detach_cgroup(struct perf_event *event);
+
+static inline int is_cgroup_event(struct perf_event *event)
+{
+ return event->cgrp != NULL;
+}
+
+extern u64 perf_cgroup_event_time(struct perf_event *event);
+extern void update_cgrp_time_from_event(struct perf_event *event);
+extern void
+update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx);
+extern void
+perf_cgroup_set_timestamp(struct task_struct *task,
+ struct perf_event_context *ctx);
+
+extern void perf_cgroup_sched_out(struct task_struct *task);
+extern void perf_cgroup_sched_in(struct task_struct *task);
+
+static inline void
+perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+{
+ struct perf_cgroup_info *t;
+ t = per_cpu_ptr(event->cgrp->info, event->cpu);
+ event->shadow_ctx_time = now - t->timestamp;
+}
+
+static inline void
+perf_cgroup_defer_enabled(struct perf_event *event)
+{
+ /*
+ * when the current task's perf cgroup does not match
+ * the event's, we need to remember to call the
+ * perf_mark_enable() function the first time a task with
+ * a matching perf cgroup is scheduled in.
+ */
+ if (is_cgroup_event(event) && !perf_cgroup_match(event))
+ event->cgrp_defer_enabled = 1;
+}
+extern inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+ struct perf_event_context *ctx);
+#else
+static inline struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task) { return NULL; }
+static inline bool perf_cgroup_match(struct perf_event *event) { return true; }
+static inline int
+perf_cgroup_connect(pid_t pid, struct perf_event *event,
+ struct perf_event_attr *attr,
+ struct perf_event *group_leader) { return -EINVAL; }
+static inline void perf_detach_cgroup(struct perf_event *event) { }
+static inline int is_cgroup_event(struct perf_event *event) { return 0; }
+static inline u64 perf_cgroup_event_time(struct perf_event *event) { return 0; }
+static inline void update_cgrp_time_from_event(struct perf_event *e) { }
+static inline void
+update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) { }
+static inline void
+perf_cgroup_set_timestamp(struct task_struct *task,
+ struct perf_event_context *ctx) { }
+static inline void perf_cgroup_sched_out(struct task_struct *task) { }
+static inline void perf_cgroup_sched_in(struct task_struct *task) { }
+static inline void
+perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) { }
+static inline void perf_cgroup_defer_enabled(struct perf_event *event) { }
+static inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+ struct perf_event_context *ctx) { }
+#endif /* CONFIG_CGROUP_PERF */
+
+extern int get_recursion_context(int *recursion);
+extern inline void put_recursion_context(int *recursion, int rctx);
+
+extern void put_callchain_buffers(void);
+extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+extern int get_callchain_buffers(void);
+#endif /* __PERF_EVENT_INTERNAL__ */
--
1.7.4.rc2



--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

2011-05-12 08:48:17

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Wed, 2011-05-11 at 19:09 +0200, Borislav Petkov wrote:

I can't really say I like this move stuff into perf_event.h and then
move it out again dance. Makes it exceedingly hard for me to tell wth
actually happened.

> include/linux/perf_event.h | 132 --------------------------------------------

Compared with:

include/linux/perf_event.h | 126 +++++++++++-
include/linux/perf_event.h | 7 +-

Its very hard to tell if this undoes the exact damage you did earlier.

> kernel/events/callchain.c | 3 +
> kernel/events/cgroup.c | 2 +
> kernel/events/core.c | 2 +
> kernel/events/internal.h | 129 +++++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 136 insertions(+), 132 deletions(-)
> create mode 100644 kernel/events/internal.h
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 7978850..6b25452 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -963,7 +963,6 @@ enum event_type_t {
> #ifdef CONFIG_PERF_EVENTS
> extern struct list_head pmus;
> extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
> -extern void perf_pmu_unregister(struct pmu *pmu);

That just doesn't make any sense. If we publish one side of the API we
should also publish the other side.

> extern int perf_num_counters(void);
> extern const char *perf_pmu_name(void);
> @@ -985,8 +984,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
> int cpu,
> struct task_struct *task,
> perf_overflow_handler_t callback);
> -extern u64 perf_event_read_value(struct perf_event *event,
> - u64 *enabled, u64 *running);

While not used, that is a valid part of the API.

>
> struct perf_sample_data {
> u64 type;
> @@ -1152,60 +1149,10 @@ extern int perf_output_begin(struct perf_output_handle *handle,
> struct perf_event *event, unsigned int size,
> int nmi, int sample);
> extern void perf_output_end(struct perf_output_handle *handle);
> -extern void perf_output_copy(struct perf_output_handle *handle,
> - const void *buf, unsigned int len);

idem

> extern int perf_swevent_get_recursion_context(void);
> -extern void perf_swevent_put_recursion_context(int rctx);

Again, creating asymmetry.

> extern void perf_event_enable(struct perf_event *event);
> extern void perf_event_disable(struct perf_event *event);
> extern void perf_event_task_tick(void);

2011-05-12 08:51:48

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Thu, 2011-05-12 at 10:51 +0200, Peter Zijlstra wrote:
> On Wed, 2011-05-11 at 19:09 +0200, Borislav Petkov wrote:

Also please don't cross-post to bouncy lists like this weird amd list
you included.

2011-05-12 10:19:15

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Thu, May 12, 2011 at 04:51:17AM -0400, Peter Zijlstra wrote:
> On Wed, 2011-05-11 at 19:09 +0200, Borislav Petkov wrote:
>
> I can't really say I like this move stuff into perf_event.h and then
> move it out again dance. Makes it exceedingly hard for me to tell wth
> actually happened.
>
> > include/linux/perf_event.h | 132 --------------------------------------------
>
> Compared with:
>
> include/linux/perf_event.h | 126 +++++++++++-
> include/linux/perf_event.h | 7 +-
>
> Its very hard to tell if this undoes the exact damage you did earlier.

The right thing to do would be to redo the patches again with internal.h
in mind.

> > kernel/events/callchain.c | 3 +
> > kernel/events/cgroup.c | 2 +
> > kernel/events/core.c | 2 +
> > kernel/events/internal.h | 129 +++++++++++++++++++++++++++++++++++++++++++
> > 5 files changed, 136 insertions(+), 132 deletions(-)
> > create mode 100644 kernel/events/internal.h
> >
> > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> > index 7978850..6b25452 100644
> > --- a/include/linux/perf_event.h
> > +++ b/include/linux/perf_event.h
> > @@ -963,7 +963,6 @@ enum event_type_t {
> > #ifdef CONFIG_PERF_EVENTS
> > extern struct list_head pmus;
> > extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
> > -extern void perf_pmu_unregister(struct pmu *pmu);
>
> That just doesn't make any sense. If we publish one side of the API we
> should also publish the other side.

Fair enough. It was unused, therefore I removed it.

> > extern int perf_num_counters(void);
> > extern const char *perf_pmu_name(void);
> > @@ -985,8 +984,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
> > int cpu,
> > struct task_struct *task,
> > perf_overflow_handler_t callback);
> > -extern u64 perf_event_read_value(struct perf_event *event,
> > - u64 *enabled, u64 *running);
>
> While not used, that is a valid part of the API.

Yep, ditto.

> >
> > struct perf_sample_data {
> > u64 type;
> > @@ -1152,60 +1149,10 @@ extern int perf_output_begin(struct perf_output_handle *handle,
> > struct perf_event *event, unsigned int size,
> > int nmi, int sample);
> > extern void perf_output_end(struct perf_output_handle *handle);
> > -extern void perf_output_copy(struct perf_output_handle *handle,
> > - const void *buf, unsigned int len);
>
> idem
>
> > extern int perf_swevent_get_recursion_context(void);
> > -extern void perf_swevent_put_recursion_context(int rctx);
>
> Again, creating asymmetry.

Ok, I won't be able to redo the patches before Mo. due to travel. Also,
I think that you should do the splitting, as I suggested so at the
beginning!

It is only natural since it is your code, only you know exactly what
you want to do with it, how to split, what to export and before we do
a couple of iterations back and forth of me sending patches and you
suggesting what needs to be changed, we could save us a bunch of trouble
and time if you did the split the "right" way :).

Btw, feel free to reuse any of my stuff if you feel like it.

P.S. Sorry for the bouncy ML, we're fixing it.

Thanks.

--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

2011-05-12 10:36:56

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code


* Borislav Petkov <[email protected]> wrote:

> Done, patch below. I've moved all exports local to kernel/events/* to
> internal.h. Branch at
>
> git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git perf-split

Beyond the things Peter noticed, there's also this new bit in perf_event.h:

enum event_type_t {
EVENT_FLEXIBLE = 0x1,
EVENT_PINNED = 0x2,
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
};

this got there from core.c, albeit it's only core.c that uses it.

If then this should move to internal.h - or stay local to core.c.

Thanks,

Ingo

2011-05-12 10:54:55

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Thu, May 12, 2011 at 06:36:44AM -0400, Ingo Molnar wrote:
>
> * Borislav Petkov <[email protected]> wrote:
>
> > Done, patch below. I've moved all exports local to kernel/events/* to
> > internal.h. Branch at
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git perf-split
>
> Beyond the things Peter noticed, there's also this new bit in perf_event.h:
>
> enum event_type_t {
> EVENT_FLEXIBLE = 0x1,
> EVENT_PINNED = 0x2,
> EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
> };
>
> this got there from core.c, albeit it's only core.c that uses it.
>
> If then this should move to internal.h - or stay local to core.c.

event_type_t gets pulled by pushing

ctx_sched_out(..., enum event_type_t event_type)

up into the header since otherwise gcc complains of not seeing the
definition of the third argument.

And yes, it should go to internal.h.

Thanks.

--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

2011-05-12 11:03:36

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code


* Borislav Petkov <[email protected]> wrote:

> On Thu, May 12, 2011 at 06:36:44AM -0400, Ingo Molnar wrote:
> >
> > * Borislav Petkov <[email protected]> wrote:
> >
> > > Done, patch below. I've moved all exports local to kernel/events/* to
> > > internal.h. Branch at
> > >
> > > git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git perf-split
> >
> > Beyond the things Peter noticed, there's also this new bit in perf_event.h:
> >
> > enum event_type_t {
> > EVENT_FLEXIBLE = 0x1,
> > EVENT_PINNED = 0x2,
> > EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
> > };
> >
> > this got there from core.c, albeit it's only core.c that uses it.
> >
> > If then this should move to internal.h - or stay local to core.c.
>
> event_type_t gets pulled by pushing
>
> ctx_sched_out(..., enum event_type_t event_type)
>
> up into the header since otherwise gcc complains of not seeing the
> definition of the third argument.
>
> And yes, it should go to internal.h.

yeah.

Thanks,

Ingo

2011-05-12 14:31:59

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Thu, May 12, 2011 at 12:18:39PM +0200, Borislav Petkov wrote:
> On Thu, May 12, 2011 at 04:51:17AM -0400, Peter Zijlstra wrote:
> > On Wed, 2011-05-11 at 19:09 +0200, Borislav Petkov wrote:
> >
> > I can't really say I like this move stuff into perf_event.h and then
> > move it out again dance. Makes it exceedingly hard for me to tell wth
> > actually happened.
> >
> > > include/linux/perf_event.h | 132 --------------------------------------------
> >
> > Compared with:
> >
> > include/linux/perf_event.h | 126 +++++++++++-
> > include/linux/perf_event.h | 7 +-
> >
> > Its very hard to tell if this undoes the exact damage you did earlier.
>
> The right thing to do would be to redo the patches again with internal.h
> in mind.
>
> > > kernel/events/callchain.c | 3 +
> > > kernel/events/cgroup.c | 2 +
> > > kernel/events/core.c | 2 +
> > > kernel/events/internal.h | 129 +++++++++++++++++++++++++++++++++++++++++++
> > > 5 files changed, 136 insertions(+), 132 deletions(-)
> > > create mode 100644 kernel/events/internal.h
> > >
> > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> > > index 7978850..6b25452 100644
> > > --- a/include/linux/perf_event.h
> > > +++ b/include/linux/perf_event.h
> > > @@ -963,7 +963,6 @@ enum event_type_t {
> > > #ifdef CONFIG_PERF_EVENTS
> > > extern struct list_head pmus;
> > > extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
> > > -extern void perf_pmu_unregister(struct pmu *pmu);
> >
> > That just doesn't make any sense. If we publish one side of the API we
> > should also publish the other side.
>
> Fair enough. It was unused, therefore I removed it.
>
> > > extern int perf_num_counters(void);
> > > extern const char *perf_pmu_name(void);
> > > @@ -985,8 +984,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
> > > int cpu,
> > > struct task_struct *task,
> > > perf_overflow_handler_t callback);
> > > -extern u64 perf_event_read_value(struct perf_event *event,
> > > - u64 *enabled, u64 *running);
> >
> > While not used, that is a valid part of the API.
>
> Yep, ditto.
>
> > >
> > > struct perf_sample_data {
> > > u64 type;
> > > @@ -1152,60 +1149,10 @@ extern int perf_output_begin(struct perf_output_handle *handle,
> > > struct perf_event *event, unsigned int size,
> > > int nmi, int sample);
> > > extern void perf_output_end(struct perf_output_handle *handle);
> > > -extern void perf_output_copy(struct perf_output_handle *handle,
> > > - const void *buf, unsigned int len);
> >
> > idem
> >
> > > extern int perf_swevent_get_recursion_context(void);
> > > -extern void perf_swevent_put_recursion_context(int rctx);
> >
> > Again, creating asymmetry.
>
> Ok, I won't be able to redo the patches before Mo. due to travel. Also,
> I think that you should do the splitting, as I suggested so at the
> beginning!

I can take it if you want. I'm currently splitting the buffer
part so I can try to relay the rest as well :)

2011-05-12 15:13:05

by Ming Lin

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Thu, May 12, 2011 at 10:31 PM, Frederic Weisbecker
<[email protected]> wrote:
> On Thu, May 12, 2011 at 12:18:39PM +0200, Borislav Petkov wrote:
>> On Thu, May 12, 2011 at 04:51:17AM -0400, Peter Zijlstra wrote:
>> > On Wed, 2011-05-11 at 19:09 +0200, Borislav Petkov wrote:
>> >
>> > I can't really say I like this move stuff into perf_event.h and then
>> > move it out again dance. Makes it exceedingly hard for me to tell wth
>> > actually happened.
>> >
>> > > ?include/linux/perf_event.h | ?132 --------------------------------------------
>> >
>> > Compared with:
>> >
>> > ?include/linux/perf_event.h | ?126 +++++++++++-
>> > ?include/linux/perf_event.h | ? ?7 +-
>> >
>> > Its very hard to tell if this undoes the exact damage you did earlier.
>>
>> The right thing to do would be to redo the patches again with internal.h
>> in mind.
>>
>> > > ?kernel/events/callchain.c ?| ? ?3 +
>> > > ?kernel/events/cgroup.c ? ? | ? ?2 +
>> > > ?kernel/events/core.c ? ? ? | ? ?2 +
>> > > ?kernel/events/internal.h ? | ?129 +++++++++++++++++++++++++++++++++++++++++++
>> > > ?5 files changed, 136 insertions(+), 132 deletions(-)
>> > > ?create mode 100644 kernel/events/internal.h
>> > >
>> > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> > > index 7978850..6b25452 100644
>> > > --- a/include/linux/perf_event.h
>> > > +++ b/include/linux/perf_event.h
>> > > @@ -963,7 +963,6 @@ enum event_type_t {
>> > > ?#ifdef CONFIG_PERF_EVENTS
>> > > ?extern struct list_head pmus;
>> > > ?extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
>> > > -extern void perf_pmu_unregister(struct pmu *pmu);
>> >
>> > That just doesn't make any sense. If we publish one side of the API we
>> > should also publish the other side.
>>
>> Fair enough. It was unused, therefore I removed it.
>>
>> > > ?extern int perf_num_counters(void);
>> > > ?extern const char *perf_pmu_name(void);
>> > > @@ -985,8 +984,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
>> > > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int cpu,
>> > > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct task_struct *task,
>> > > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? perf_overflow_handler_t callback);
>> > > -extern u64 perf_event_read_value(struct perf_event *event,
>> > > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?u64 *enabled, u64 *running);
>> >
>> > While not used, that is a valid part of the API.
>>
>> Yep, ditto.
>>
>> > >
>> > > ?struct perf_sample_data {
>> > > ? ? ? ? u64 ? ? ? ? ? ? ? ? ? ? ? ? ? ? type;
>> > > @@ -1152,60 +1149,10 @@ extern int perf_output_begin(struct perf_output_handle *handle,
>> > > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct perf_event *event, unsigned int size,
>> > > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int nmi, int sample);
>> > > ?extern void perf_output_end(struct perf_output_handle *handle);
>> > > -extern void perf_output_copy(struct perf_output_handle *handle,
>> > > - ? ? ? ? ? ? ? ? ? ? ? ? ? ?const void *buf, unsigned int len);
>> >
>> > idem
>> >
>> > > ?extern int perf_swevent_get_recursion_context(void);
>> > > -extern void perf_swevent_put_recursion_context(int rctx);
>> >
>> > Again, creating asymmetry.
>>
>> Ok, I won't be able to redo the patches before Mo. due to travel. Also,
>> I think that you should do the splitting, as I suggested so at the
>> beginning!
>
> I can take it if you want. I'm currently splitting the buffer
> part so I can try to relay the rest as well :)

Another split could be to split the software pmus out.
perf_swevent, perf_cpu_clock and perf_task_clock.

--
Lin Ming -- Intel Open Source Technology Center

2011-05-14 08:45:28

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Thu, May 12, 2011 at 10:31:52AM -0400, Frederic Weisbecker wrote:
> > Ok, I won't be able to redo the patches before Mo. due to travel. Also,
> > I think that you should do the splitting, as I suggested so at the
> > beginning!
>
> I can take it if you want. I'm currently splitting the buffer
> part so I can try to relay the rest as well :)

Hey Frederic,

yeah, sure, go ahead.

I can take over your preso at LinuxTag instead, if you'd like :)

Thanks.

--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

2011-05-14 13:02:51

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Carve out cgroup-related code

On Sat, May 14, 2011 at 10:44:50AM +0200, Borislav Petkov wrote:
> On Thu, May 12, 2011 at 10:31:52AM -0400, Frederic Weisbecker wrote:
> > > Ok, I won't be able to redo the patches before Mo. due to travel. Also,
> > > I think that you should do the splitting, as I suggested so at the
> > > beginning!
> >
> > I can take it if you want. I'm currently splitting the buffer
> > part so I can try to relay the rest as well :)
>
> Hey Frederic,
>
> yeah, sure, go ahead.
>
> I can take over your preso at LinuxTag instead, if you'd like :)

Sure, please do ;)