Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754702AbbHXOdW (ORCPT ); Mon, 24 Aug 2015 10:33:22 -0400 Received: from mga02.intel.com ([134.134.136.20]:4072 "EHLO mga02.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754598AbbHXOdS (ORCPT ); Mon, 24 Aug 2015 10:33:18 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.15,738,1432623600"; d="scan'208";a="774595901" From: Alexander Shishkin To: Peter Zijlstra , Ingo Molnar Cc: linux-kernel@vger.kernel.org, adrian.hunter@intel.com, Arnaldo Carvalho de Melo , Vince Weaver , Stephane Eranian , Johannes Berg , Alexander Shishkin Subject: [PATCH v2 1/6] perf: Introduce extended syscall error reporting Date: Mon, 24 Aug 2015 17:32:55 +0300 Message-Id: <1440426780-27227-2-git-send-email-alexander.shishkin@linux.intel.com> X-Mailer: git-send-email 2.5.0 In-Reply-To: <1440426780-27227-1-git-send-email-alexander.shishkin@linux.intel.com> References: <1440426780-27227-1-git-send-email-alexander.shishkin@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8372 Lines: 284 It has been pointed several times out that perf syscall error reporting leaves a lot to be desired [1]. This patch introduces a fairly simple extension that allows call sites to annotate their error codes with arbitrary strings, which will then be copied to userspace (if they asked for it) along with the module name that produced the error message in JSON format. This way, we can provide both human-readable and machine-parsable information to user and leave room for extensions in the future, such as file name and line number if kernel debugging is enabled. Each error "site" is referred to by its index, which is folded into an integer error value within the range of [-PERF_ERRNO, -MAX_ERRNO], where PERF_ERRNO is chosen to be below any known error codes, but still leaving enough room to enumerate error sites. This way, all the traditional macros will still handle these as error codes and we'd only have to convert them to their original values right before returning to userspace. This way we also don't have to worry about keeping a separate pointer to the error message inside a perf_event. [1] http://marc.info/?l=linux-kernel&m=141470811013082 Signed-off-by: Alexander Shishkin --- include/linux/perf_event.h | 71 ++++++++++++++++++++++++++++++++++ include/uapi/linux/perf_event.h | 8 +++- kernel/events/core.c | 86 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 159 insertions(+), 6 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2027809433..8e37939d21 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -56,6 +56,77 @@ struct perf_guest_info_callbacks { #include #include +#ifndef PERF_MODNAME +#define PERF_MODNAME "perf" +#endif + +/* + * Extended error reporting: annotate an error code with a string + * and a module name to help users diagnase problems with their + * attributes and whatnot. + */ +struct perf_err_site { + const char *message; + const char *owner; + const int code; +}; + +#ifdef CONFIG_PERF_EVENTS + +/* + * Place all occurrences of struct perf_err_site into a special section, + * so that we can find out their offsets, which we'll use to refer back + * to the error sites. + */ +extern const struct perf_err_site __start___perf_err[], __stop___perf_err[]; + +#define __perf_err(__c, __m) ({ \ + static struct perf_err_site \ + __attribute__ ((__section__("__perf_err"))) \ + __err_site = { \ + .message = (__m), \ + .owner = PERF_MODNAME, \ + .code = __builtin_constant_p((__c)) ? \ + (__c) : -EINVAL, \ + }; \ + &__err_site; \ +}) + +/* + * Use part of the [-1, -MAX_ERRNO] errno range for perf's extended error + * reporting. Anything within [-PERF_ERRNO, -MAX_ERRNO] is an index of a + * perf_err_site structure within __perf_err section. 3k should be enough + * for everybody, but let's add a boot-time warning just in case it overflows + * one day. + */ +#define PERF_ERRNO 1024 + +static inline int perf_errno(const struct perf_err_site *site) +{ + unsigned long err = site - __start___perf_err; + + return -(int)err - PERF_ERRNO; +} + +static inline const struct perf_err_site *perf_errno_to_site(int err) +{ + return __start___perf_err - err - PERF_ERRNO; +} + +#ifdef MODULE +/* + * Module support is a tad trickier, but far from rocket surgery. Let's + * bypass it for now. + */ +#define perf_err(__c, __m) (__c) +#else +#define perf_err(__c, __m) (perf_errno(__perf_err((__c), (__m)))) +#endif + +#define PERF_ERR_PTR(__e, __m) (ERR_PTR(perf_err(__e, __m))) + +#endif + struct perf_callchain_entry { __u64 nr; __u64 ip[PERF_MAX_STACK_DEPTH]; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 2881145cda..73b6919954 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -264,6 +264,7 @@ enum perf_event_read_format { /* add: sample_stack_user */ #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* add: perf_err */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -375,7 +376,12 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; - __u32 __reserved_2; /* align to __u64 */ + + /* + * Extended error reporting buffer + */ + __u32 perf_err_size; + __u64 perf_err; }; #define perf_flags(attr) (*(&(attr)->read_format + 1)) diff --git a/kernel/events/core.c b/kernel/events/core.c index ae16867670..c6f43d1d5f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -49,6 +49,79 @@ #include +static bool extended_reporting_enabled(struct perf_event_attr *attr) +{ + if (attr->size >= PERF_ATTR_SIZE_VER6 && + attr->perf_err_size > 0) + return true; + + return false; +} + +/* + * Provide a JSON formatted error report to the user if they asked for it. + */ +static void perf_error_report_site(struct perf_event_attr *attr, + const struct perf_err_site *site) +{ + unsigned long len; + char *buffer; + + if (!site || !extended_reporting_enabled(attr)) + return; + + /* in case of nested perf_err()s, which you shouldn't really do */ + while (site->code <= -PERF_ERRNO) + site = perf_errno_to_site(site->code); + + buffer = kasprintf(GFP_KERNEL, + "{\n" + "\t\"code\": %d,\n" + "\t\"module\": \"%s\",\n" + "\t\"message\": \"%s\"\n" + "}\n", + site->code, site->owner, site->message + ); + if (!buffer) + return; + + /* trim the buffer to the supplied boundary */ + len = strlen(buffer); + if (len >= attr->perf_err_size) { + len = attr->perf_err_size - 1; + buffer[len] = 0; + } + + if (copy_to_user((void __user *)attr->perf_err, buffer, len + 1)) { + /* if we failed to copy once, don't bother later */ + attr->perf_err_size = 0; + } + + kfree(buffer); +} + +/* + * Synchronous version of perf_err(), for the paths where we return immediately + * back to userspace. + */ +#define perf_err_sync(__attr, __c, __m) ({ \ + perf_error_report_site(__attr, __perf_err((__c), (__m))); \ + (__c); \ +}) + +static int perf_error_report(struct perf_event_attr *attr, int err) +{ + const struct perf_err_site *site; + + if (err > -PERF_ERRNO) + return err; + + site = perf_errno_to_site(err); + perf_error_report_site(attr, site); + + return site->code; +} + static struct workqueue_struct *perf_wq; typedef int (*remote_function_f)(void *); @@ -3904,7 +3977,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) ret = perf_read_hw(event, buf, count); perf_event_ctx_unlock(event, ctx); - return ret; + return perf_error_report(&event->attr, ret); } static unsigned int perf_poll(struct file *file, poll_table *wait) @@ -4152,7 +4225,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ret = _perf_ioctl(event, cmd, arg); perf_event_ctx_unlock(event, ctx); - return ret; + return perf_error_report(&event->attr, ret); } #ifdef CONFIG_COMPAT @@ -4752,7 +4825,7 @@ aux_unlock: if (event->pmu->event_mapped) event->pmu->event_mapped(event); - return ret; + return perf_error_report(&event->attr, ret); } static int perf_fasync(int fd, struct file *filp, int on) @@ -4766,7 +4839,7 @@ static int perf_fasync(int fd, struct file *filp, int on) mutex_unlock(&inode->i_mutex); if (retval < 0) - return retval; + return perf_error_report(&event->attr, retval); return 0; } @@ -8352,7 +8425,7 @@ err_group_fd: fdput(group); err_fd: put_unused_fd(event_fd); - return err; + return perf_error_report(&attr, err); } /** @@ -9130,6 +9203,9 @@ void __init perf_event_init(void) */ BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head)) != 1024); + + /* Too many error sites; see the comment at PERF_ERRNO definition */ + WARN_ON(__stop___perf_err - __start___perf_err > MAX_ERRNO - PERF_ERRNO); } ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/