The following patchset adds address masks to existing perf hardware
breakpoint mechanism to allow trapping on an address range (currently
only single address) on supported architectures.
perf uapi is updated, x86 AMD implementation (for AMD Family 16h and
beyond) is provided, and perf tool has been extended to do:
$ perf stat -e mem:0x1000:w:0xf a.out
^^^
"don't care" bit mask
which will count writes to [0x1000 ~ 0x1010)
Jacob Shin (2):
perf: Add hardware breakpoint address mask
perf, x86: AMD implementation for hardware breakpoint address mask
Suravee Suthikulpanit (3):
perf tools: Add breakpoint address mask to the mem event parser
perf tools: Add breakpoint address mask syntax to perf list and
documentation
perf tools: Add breakpoint address mask test case to
tests/parse-events
arch/Kconfig | 4 ++++
arch/x86/Kconfig | 1 +
arch/x86/include/asm/cpufeature.h | 2 ++
arch/x86/include/asm/debugreg.h | 7 ++++++
arch/x86/include/asm/hw_breakpoint.h | 6 ++++++
arch/x86/include/uapi/asm/msr-index.h | 6 ++++++
arch/x86/kernel/cpu/amd.c | 19 +++++++++++++++++
arch/x86/kernel/hw_breakpoint.c | 5 +++++
include/linux/hw_breakpoint.h | 6 ++++++
include/uapi/linux/perf_event.h | 5 ++++-
kernel/events/hw_breakpoint.c | 3 +++
tools/perf/Documentation/perf-record.txt | 14 ++++++++----
tools/perf/tests/parse-events.c | 34 ++++++++++++++++++++++++++++++
tools/perf/util/parse-events.c | 5 +++--
tools/perf/util/parse-events.h | 2 +-
tools/perf/util/parse-events.y | 14 ++++++++++--
16 files changed, 123 insertions(+), 10 deletions(-)
--
1.7.9.5
From: Suravee Suthikulpanit <[email protected]>
Allow perf tool to pass in breakpoint address mask to match an address
range, i.e.:
$ perf stat -e mem:0x1000:w:0xf a.out
Will count writes to [0x1000 ~ 0x1010)
Signed-off-by: Suravee Suthikulpanit <[email protected]>
Signed-off-by: Jacob Shin <[email protected]>
---
tools/perf/util/parse-events.c | 3 ++-
tools/perf/util/parse-events.h | 2 +-
tools/perf/util/parse-events.y | 14 ++++++++++++--
3 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6c8bb0f..0744895 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -508,12 +508,13 @@ do { \
}
int parse_events_add_breakpoint(struct list_head **list, int *idx,
- void *ptr, char *type)
+ void *ptr, char *type, void *msk)
{
struct perf_event_attr attr;
memset(&attr, 0, sizeof(attr));
attr.bp_addr = (unsigned long) ptr;
+ attr.bp_addr_mask = (unsigned long) msk;
if (parse_breakpoint_type(type, &attr))
return -EINVAL;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8a48593..83920cc 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -92,7 +92,7 @@ int parse_events_add_numeric(struct list_head **list, int *idx,
int parse_events_add_cache(struct list_head **list, int *idx,
char *type, char *op_result1, char *op_result2);
int parse_events_add_breakpoint(struct list_head **list, int *idx,
- void *ptr, char *type);
+ void *ptr, char *type, void *msk);
int parse_events_add_pmu(struct list_head **list, int *idx,
char *pmu , struct list_head *head_config);
void parse_events__set_leader(char *name, struct list_head *list);
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index afc44c1..cfb0877 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -263,13 +263,23 @@ PE_NAME_CACHE_TYPE
}
event_legacy_mem:
+PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP ':' PE_VALUE sep_dc
+{
+ struct parse_events_evlist *data = _data;
+ struct list_head *list = NULL;
+
+ ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
+ (void *) $2, $4, (void *) $6));
+ $$ = list;
+}
+|
PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
{
struct parse_events_evlist *data = _data;
struct list_head *list = NULL;
ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
- (void *) $2, $4));
+ (void *) $2, $4, NULL));
$$ = list;
}
|
@@ -279,7 +289,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc
struct list_head *list = NULL;
ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
- (void *) $2, NULL));
+ (void *) $2, NULL, NULL));
$$ = list;
}
--
1.7.9.5
Some architectures (for us, AMD Family 16h) allow for "don't care" bit
mask to further qualify a hardware breakpoint address, in order to
trap on range of addresses. Update perf uapi to add bp_addr_mask field
and define HAVE_HW_BREAKPOINT_ADDR_MASK.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/Kconfig | 4 ++++
include/linux/hw_breakpoint.h | 6 ++++++
include/uapi/linux/perf_event.h | 5 ++++-
kernel/events/hw_breakpoint.c | 3 +++
4 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 1455579..8423e9c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -249,6 +249,10 @@ config HAVE_HW_BREAKPOINT
bool
depends on PERF_EVENTS
+config HAVE_HW_BREAKPOINT_ADDR_MASK
+ bool
+ depends on HAVE_HW_BREAKPOINT
+
config HAVE_MIXED_BREAKPOINTS_REGS
bool
depends on HAVE_HW_BREAKPOINT
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 0464c85..9384201 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -84,6 +84,12 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
return &bp->hw.info;
}
+#ifdef CONFIG_HAVE_HW_BREAKPOINT_ADDR_MASK
+extern int arch_has_hw_breakpoint_addr_mask(void);
+#else
+static inline int arch_has_hw_breakpoint_addr_mask(void) { return 0; }
+#endif /* CONFIG_HAVE_HW_BREAKPOINT_ADDR_MASK */
+
#else /* !CONFIG_HAVE_HW_BREAKPOINT */
static inline int __init init_hw_breakpoint(void) { return 0; }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index fb104e5..e22e1d1 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -286,7 +286,10 @@ struct perf_event_attr {
__u64 config1; /* extension of config */
};
union {
- __u64 bp_len;
+ struct {
+ __u32 bp_len;
+ __u32 bp_addr_mask;
+ };
__u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index a64f8ae..e186a46 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -612,6 +612,9 @@ static int hw_breakpoint_add(struct perf_event *bp, int flags)
if (!(flags & PERF_EF_START))
bp->hw.state = PERF_HES_STOPPED;
+ if (bp->attr.bp_addr_mask && !arch_has_hw_breakpoint_addr_mask())
+ return -EOPNOTSUPP;
+
return arch_install_hw_breakpoint(bp);
}
--
1.7.9.5
From: Suravee Suthikulpanit <[email protected]>
Signed-off-by: Suravee Suthikulpanit <[email protected]>
Signed-off-by: Jacob Shin <[email protected]>
---
tools/perf/tests/parse-events.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 88e2f44..1ade502 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -372,6 +372,32 @@ static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist)
return test__checkevent_breakpoint_rw(evlist);
}
+static int test__checkevent_breakpoint_rw_addrmsk(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong bp_addr_mask",
+ 0 == evsel->attr.bp_addr_mask);
+
+ return test__checkevent_breakpoint_rw(evlist);
+}
+
+static int test__checkevent_breakpoint_x_addrmsk_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(perf_evsel__name(evsel), "mem:0:x:0:k"));
+ TEST_ASSERT_VAL("wrong bp_addr_mask",
+ 0 == evsel->attr.bp_addr_mask);
+
+ return test__checkevent_breakpoint_x(evlist);
+}
+
static int test__checkevent_pmu(struct perf_evlist *evlist)
{
@@ -1187,6 +1213,14 @@ static struct evlist_test test__events[] = {
.name = "{cycles:G,cache-misses:H}:uG",
.check = test__group_gh4,
},
+ [38] = {
+ .name = "mem:0:rw:0",
+ .check = test__checkevent_breakpoint_rw_addrmsk,
+ },
+ [39] = {
+ .name = "mem:0:x:0:k",
+ .check = test__checkevent_breakpoint_x_addrmsk_modifier,
+ },
};
static struct evlist_test test__events_pmu[] = {
--
1.7.9.5
Implement hardware breakpoint address mask for AMD Family 16h (and
any other future) processors. CPUID feature bit indicates the hardware
support for DRn_ADDR_MASK MSRs.
Signed-off-by: Jacob Shin <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/cpufeature.h | 2 ++
arch/x86/include/asm/debugreg.h | 7 +++++++
arch/x86/include/asm/hw_breakpoint.h | 6 ++++++
arch/x86/include/uapi/asm/msr-index.h | 6 ++++++
arch/x86/kernel/cpu/amd.c | 19 +++++++++++++++++++
arch/x86/kernel/hw_breakpoint.c | 5 +++++
7 files changed, 46 insertions(+)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 70c0f3d..eee63f8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -65,6 +65,7 @@ config X86
select HAVE_KERNEL_XZ
select HAVE_KERNEL_LZO
select HAVE_HW_BREAKPOINT
+ select HAVE_HW_BREAKPOINT_ADDR_MASK
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 93fe929..eb4f3a7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,7 @@
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -315,6 +316,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT)
+#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT)
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 4b528a9..bb48949 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -114,5 +114,12 @@ static inline void debug_stack_usage_inc(void) { }
static inline void debug_stack_usage_dec(void) { }
#endif /* X86_64 */
+#ifdef CONFIG_CPU_SUP_AMD
+extern void set_dr_addr_mask(u32 mask, int dr);
+#else
+static inline void set_dr_addr_mask(u32 mask, int dr)
+{
+}
+#endif
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index ef1c4d2..c939415 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -14,6 +14,7 @@ struct arch_hw_breakpoint {
unsigned long address;
u8 len;
u8 type;
+ u32 mask;
};
#include <linux/kdebug.h>
@@ -72,4 +73,9 @@ extern int arch_bp_generic_fields(int x86_len, int x86_type,
extern struct pmu perf_ops_bp;
+static inline int arch_has_hw_breakpoint_addr_mask(void)
+{
+ return cpu_has_bpext;
+}
+
#endif /* _I386_HW_BREAKPOINT_H */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bf7bb68..b0b9335 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -196,6 +196,12 @@
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+/* Fam 16h MSRs */
+#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
+#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
+#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
+#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
+
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fa96eb0..aadc499 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -910,3 +910,22 @@ bool cpu_has_amd_erratum(const int *erratum)
}
EXPORT_SYMBOL_GPL(cpu_has_amd_erratum);
+
+void set_dr_addr_mask(u32 mask, int dr)
+{
+ if (!cpu_has_bpext)
+ return;
+
+ switch (dr) {
+ case 0:
+ wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
+ break;
+ case 1:
+ case 2:
+ case 3:
+ wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
+ break;
+ default:
+ break;
+ }
+}
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 02f0763..f8bf2df 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -121,6 +121,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
return -EBUSY;
+ set_dr_addr_mask(info->mask, i);
+
set_debugreg(info->address, i);
__this_cpu_write(cpu_debugreg[i], info->address);
@@ -163,6 +165,8 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
*dr7 &= ~__encode_dr7(i, info->len, info->type);
set_debugreg(*dr7, 7);
+
+ set_dr_addr_mask(0, i);
}
static int get_hbp_len(u8 hbp_len)
@@ -254,6 +258,7 @@ static int arch_build_bp_info(struct perf_event *bp)
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
info->address = bp->attr.bp_addr;
+ info->mask = bp->attr.bp_addr_mask;
/* Type */
switch (bp->attr.bp_type) {
--
1.7.9.5
From: Suravee Suthikulpanit <[email protected]>
Signed-off-by: Suravee Suthikulpanit <[email protected]>
Signed-off-by: Jacob Shin <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 14 ++++++++++----
tools/perf/util/parse-events.c | 2 +-
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index d4da111..48a7587 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -33,13 +33,19 @@ OPTIONS
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
- - a hardware breakpoint event in the form of '\mem:addr[:access]'
- where addr is the address in memory you want to break in.
- Access is the memory access type (read, write, execute) it can
- be passed as follows: '\mem:addr[:[r][w][x]]'.
+ - a hardware breakpoint event in the form of
+ '\mem:addr[:access][:addr_mask]' where addr is the address in
+ memory you want to break in. Access is the memory access type (read,
+ write, execute) it can be passed as follows: '\mem:addr[:[r][w][x]]'.
+ addr_mask is the "don't care" bit mask to further qualify the given
+ addr, to break in on accesses to an address range.
+
If you want to profile read-write accesses in 0x1000, just set
'mem:0x1000:rw'.
+ If you want to profile write accesses in [0x1000 ~ 0x1010), just set
+ 'mem:0x1000:w:0xf'. (Only supported on some hardware)
+
--filter=<filter>::
Event filter.
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 0744895..93d0c9c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1148,7 +1148,7 @@ void print_events(const char *event_glob, bool name_only)
printf("\n");
printf(" %-50s [%s]\n",
- "mem:<addr>[:access]",
+ "mem:<addr>[:access][:addr_mask]",
event_type_descriptors[PERF_TYPE_BREAKPOINT]);
printf("\n");
}
--
1.7.9.5
On Tue, Apr 09, 2013 at 12:21:48PM -0500, Jacob Shin wrote:
> The following patchset adds address masks to existing perf hardware
> breakpoint mechanism to allow trapping on an address range (currently
> only single address) on supported architectures.
>
> perf uapi is updated, x86 AMD implementation (for AMD Family 16h and
> beyond) is provided, and perf tool has been extended to do:
>
> $ perf stat -e mem:0x1000:w:0xf a.out
> ^^^
> "don't care" bit mask
>
> which will count writes to [0x1000 ~ 0x1010)
Ping .. Ingo?
>
> Jacob Shin (2):
> perf: Add hardware breakpoint address mask
> perf, x86: AMD implementation for hardware breakpoint address mask
>
> Suravee Suthikulpanit (3):
> perf tools: Add breakpoint address mask to the mem event parser
> perf tools: Add breakpoint address mask syntax to perf list and
> documentation
> perf tools: Add breakpoint address mask test case to
> tests/parse-events
>
> arch/Kconfig | 4 ++++
> arch/x86/Kconfig | 1 +
> arch/x86/include/asm/cpufeature.h | 2 ++
> arch/x86/include/asm/debugreg.h | 7 ++++++
> arch/x86/include/asm/hw_breakpoint.h | 6 ++++++
> arch/x86/include/uapi/asm/msr-index.h | 6 ++++++
> arch/x86/kernel/cpu/amd.c | 19 +++++++++++++++++
> arch/x86/kernel/hw_breakpoint.c | 5 +++++
> include/linux/hw_breakpoint.h | 6 ++++++
> include/uapi/linux/perf_event.h | 5 ++++-
> kernel/events/hw_breakpoint.c | 3 +++
> tools/perf/Documentation/perf-record.txt | 14 ++++++++----
> tools/perf/tests/parse-events.c | 34 ++++++++++++++++++++++++++++++
> tools/perf/util/parse-events.c | 5 +++--
> tools/perf/util/parse-events.h | 2 +-
> tools/perf/util/parse-events.y | 14 ++++++++++--
> 16 files changed, 123 insertions(+), 10 deletions(-)
>
> --
> 1.7.9.5
>
On Tue, Apr 09, 2013 at 12:21:48PM -0500, Jacob Shin wrote:
> The following patchset adds address masks to existing perf hardware
> breakpoint mechanism to allow trapping on an address range (currently
> only single address) on supported architectures.
>
> perf uapi is updated, x86 AMD implementation (for AMD Family 16h and
> beyond) is provided, and perf tool has been extended to do:
>
> $ perf stat -e mem:0x1000:w:0xf a.out
> ^^^
> "don't care" bit mask
>
> which will count writes to [0x1000 ~ 0x1010)
>
> Jacob Shin (2):
> perf: Add hardware breakpoint address mask
> perf, x86: AMD implementation for hardware breakpoint address mask
>
> Suravee Suthikulpanit (3):
> perf tools: Add breakpoint address mask to the mem event parser
> perf tools: Add breakpoint address mask syntax to perf list and
> documentation
> perf tools: Add breakpoint address mask test case to
> tests/parse-events
hi,
the perf tool patches look ok.. thanks for tests! ;)
jirka
* Jacob Shin <[email protected]> wrote:
> On Tue, Apr 09, 2013 at 12:21:48PM -0500, Jacob Shin wrote:
> > The following patchset adds address masks to existing perf hardware
> > breakpoint mechanism to allow trapping on an address range (currently
> > only single address) on supported architectures.
> >
> > perf uapi is updated, x86 AMD implementation (for AMD Family 16h and
> > beyond) is provided, and perf tool has been extended to do:
> >
> > $ perf stat -e mem:0x1000:w:0xf a.out
> > ^^^
> > "don't care" bit mask
> >
> > which will count writes to [0x1000 ~ 0x1010)
>
> Ping .. Ingo?
breakpoint patches usually come to me through (or with the acks of) Frederic
and/or Oleg.
Frederic, Oleg, mind having a look?
Thanks,
Ingo
On Tue, Apr 16, 2013 at 11:36:20AM +0200, Ingo Molnar wrote:
>
> * Jacob Shin <[email protected]> wrote:
>
> > On Tue, Apr 09, 2013 at 12:21:48PM -0500, Jacob Shin wrote:
> > > The following patchset adds address masks to existing perf hardware
> > > breakpoint mechanism to allow trapping on an address range (currently
> > > only single address) on supported architectures.
> > >
> > > perf uapi is updated, x86 AMD implementation (for AMD Family 16h and
> > > beyond) is provided, and perf tool has been extended to do:
> > >
> > > $ perf stat -e mem:0x1000:w:0xf a.out
> > > ^^^
> > > "don't care" bit mask
> > >
> > > which will count writes to [0x1000 ~ 0x1010)
> >
> > Ping .. Ingo?
>
> breakpoint patches usually come to me through (or with the acks of) Frederic
> and/or Oleg.
>
> Frederic, Oleg, mind having a look?
Hi Frederic, Oleg. Could one of you take a look over this patchset
when you get the chance? Thank you in advance for taking the time!
Hi Jacob,
On 04/18, Jacob Shin wrote:
>
> Hi Frederic, Oleg. Could one of you take a look over this patchset
> when you get the chance? Thank you in advance for taking the time!
Not sure I can really help ;) But I'll try to study these patches on
weekend.
Could you send me (privately) mbox with the patches/discussion ?
Oleg.
On 04/09, Jacob Shin wrote:
>
> @@ -612,6 +612,9 @@ static int hw_breakpoint_add(struct perf_event *bp, int flags)
> if (!(flags & PERF_EF_START))
> bp->hw.state = PERF_HES_STOPPED;
>
> + if (bp->attr.bp_addr_mask && !arch_has_hw_breakpoint_addr_mask())
> + return -EOPNOTSUPP;
> +
This is called by sched_in... Isn't it "too late" ?
Perhaps arch_validate_hwbkpt_settings() should validate mask/cpu_has_bpext?
Oleg.
On 04/09, Jacob Shin wrote:
>
> The following patchset adds address masks to existing perf hardware
> breakpoint mechanism to allow trapping on an address range (currently
> only single address) on supported architectures.
>
> perf uapi is updated, x86 AMD implementation (for AMD Family 16h and
> beyond) is provided, and perf tool has been extended to do:
>
> $ perf stat -e mem:0x1000:w:0xf a.out
> ^^^
> "don't care" bit mask
>
> which will count writes to [0x1000 ~ 0x1010)
Please help me understand...
Assuming that cpu_has_bpext == T, suppose that
bp_addr = 0x1001;
bp_bp_addr_mask = 0xf;
Is it the same as 0x1000/0xf above?
IOW, what exactly this mask means? I guess, mem:ADDR:w:MASK
should trigger the trap if CPU writes to the addr and
(addr & ~MASK) == (ADDR & ~MASK)
correct?
And does attr.bp_len "contribute" to the mask?
I mean, if bp_len == X86_BREAKPOINT_LEN_8, does this mean that
bp_bp_addr_mask and (bp_bp_addr_mask | 7) have the same effect?
Oleg.
On Sat, Apr 20, 2013 at 06:22:23PM +0200, Oleg Nesterov wrote:
> On 04/09, Jacob Shin wrote:
> >
> > @@ -612,6 +612,9 @@ static int hw_breakpoint_add(struct perf_event *bp, int flags)
> > if (!(flags & PERF_EF_START))
> > bp->hw.state = PERF_HES_STOPPED;
> >
> > + if (bp->attr.bp_addr_mask && !arch_has_hw_breakpoint_addr_mask())
> > + return -EOPNOTSUPP;
> > +
>
> This is called by sched_in... Isn't it "too late" ?
>
> Perhaps arch_validate_hwbkpt_settings() should validate mask/cpu_has_bpext?
Ah, yes okay. Should I do this for all the archs that HAVE_HW_BREAKPOINT ?
Or is creating HAVE_HW_BREAKPOINT_ADDR_MASK and in validate_hw_breakpoint:
#ifndef HAVE_HW_BREAKPOINT_ADDR_MASK
if (bp->attr.bp_addr_mask)
return -EOPNOTSUPP;
#endif
Okay to do?
On Sat, Apr 20, 2013 at 06:53:34PM +0200, Oleg Nesterov wrote:
> On 04/09, Jacob Shin wrote:
> >
> > The following patchset adds address masks to existing perf hardware
> > breakpoint mechanism to allow trapping on an address range (currently
> > only single address) on supported architectures.
> >
> > perf uapi is updated, x86 AMD implementation (for AMD Family 16h and
> > beyond) is provided, and perf tool has been extended to do:
> >
> > $ perf stat -e mem:0x1000:w:0xf a.out
> > ^^^
> > "don't care" bit mask
> >
> > which will count writes to [0x1000 ~ 0x1010)
>
> Please help me understand...
>
> Assuming that cpu_has_bpext == T, suppose that
>
> bp_addr = 0x1001;
> bp_bp_addr_mask = 0xf;
>
> Is it the same as 0x1000/0xf above?
>
> IOW, what exactly this mask means? I guess, mem:ADDR:w:MASK
> should trigger the trap if CPU writes to the addr and
>
> (addr & ~MASK) == (ADDR & ~MASK)
>
> correct?
Yes that is correct.
>
> And does attr.bp_len "contribute" to the mask?
>
> I mean, if bp_len == X86_BREAKPOINT_LEN_8, does this mean that
> bp_bp_addr_mask and (bp_bp_addr_mask | 7) have the same effect?
Yes it has the same effect.
Thanks,
-Jacob
On 04/20, Jacob Shin wrote:
>
> On Sat, Apr 20, 2013 at 06:22:23PM +0200, Oleg Nesterov wrote:
> > On 04/09, Jacob Shin wrote:
> > >
> > > @@ -612,6 +612,9 @@ static int hw_breakpoint_add(struct perf_event *bp, int flags)
> > > if (!(flags & PERF_EF_START))
> > > bp->hw.state = PERF_HES_STOPPED;
> > >
> > > + if (bp->attr.bp_addr_mask && !arch_has_hw_breakpoint_addr_mask())
> > > + return -EOPNOTSUPP;
> > > +
> >
> > This is called by sched_in... Isn't it "too late" ?
> >
> > Perhaps arch_validate_hwbkpt_settings() should validate mask/cpu_has_bpext?
>
> Ah, yes okay. Should I do this for all the archs that HAVE_HW_BREAKPOINT ?
>
> Or is creating HAVE_HW_BREAKPOINT_ADDR_MASK and in validate_hw_breakpoint:
>
> #ifndef HAVE_HW_BREAKPOINT_ADDR_MASK
> if (bp->attr.bp_addr_mask)
> return -EOPNOTSUPP;
> #endif
Or a __weak function overridden in amd.c. I do not know what would be
better, up to you.
Hmm. But this patch already defines arch_has_hw_breakpoint_addr_mask()
for any arch, so validate_hw_breakpoint() can just use it?
Oleg.
On 04/20, Jacob Shin wrote:
>
> On Sat, Apr 20, 2013 at 06:53:34PM +0200, Oleg Nesterov wrote:
> >
> > And does attr.bp_len "contribute" to the mask?
> >
> > I mean, if bp_len == X86_BREAKPOINT_LEN_8, does this mean that
> > bp_bp_addr_mask and (bp_bp_addr_mask | 7) have the same effect?
>
> Yes it has the same effect.
OK, thanks...
So this is the "natural" extension. Given that currently bp_addr
should be aligned, bp_len could be already bp_mask but I guess it
is too late to change this, so we need another field.
Hmm. Perhaps arch_has_hw_breakpoint_addr_mask(void) should be turned
into arch_validate_hw_breakpoint_addr_mask(bp) which should also
check that (bp_addr & bp_addr_mask) == 0. But I won't insist.
Oleg.
Well, I am not going to argue, just the question...
On 04/09, Jacob Shin wrote:
>
> $ perf stat -e mem:0x1000:w:0xf a.out
perhaps, say, perf -e mem:0x1000/0xf:w will look better?
No, I do not understand .y so I do not know if it is simple to
implement or not ;)
Hmm. I did "grep bp_len tools/perf" and it seems that there is
no way to specify bp_len ? Looks like it is always LEN_4...
Once again, I am just asking.
Oleg.
Not a comment, but the question...
On 04/09, Jacob Shin wrote:
>
> --- a/arch/x86/include/asm/hw_breakpoint.h
> +++ b/arch/x86/include/asm/hw_breakpoint.h
> @@ -14,6 +14,7 @@ struct arch_hw_breakpoint {
> unsigned long address;
> u8 len;
> u8 type;
> + u32 mask;
> };
...
> @@ -254,6 +258,7 @@ static int arch_build_bp_info(struct perf_event *bp)
> struct arch_hw_breakpoint *info = counter_arch_bp(bp);
>
> info->address = bp->attr.bp_addr;
> + info->mask = bp->attr.bp_addr_mask;
OK, this matches the usage of info->address so I think this change
is right.
But otoh, why do we need info->address (or mask added by this patch)?
we could use bp->attr.bp_addr instead. arch_hw_breakpoint could have
a single filed = "type | len" for encode_dr7().
Yes, off-topic, sorry for noise.
Oleg.
On Sun, Apr 21, 2013 at 07:02:02PM +0200, Oleg Nesterov wrote:
> On 04/20, Jacob Shin wrote:
> >
> > On Sat, Apr 20, 2013 at 06:53:34PM +0200, Oleg Nesterov wrote:
> > >
> > > And does attr.bp_len "contribute" to the mask?
> > >
> > > I mean, if bp_len == X86_BREAKPOINT_LEN_8, does this mean that
> > > bp_bp_addr_mask and (bp_bp_addr_mask | 7) have the same effect?
> >
> > Yes it has the same effect.
>
> OK, thanks...
>
> So this is the "natural" extension. Given that currently bp_addr
> should be aligned, bp_len could be already bp_mask but I guess it
> is too late to change this, so we need another field.
>
> Hmm. Perhaps arch_has_hw_breakpoint_addr_mask(void) should be turned
> into arch_validate_hw_breakpoint_addr_mask(bp) which should also
> check that (bp_addr & bp_addr_mask) == 0. But I won't insist.
Yes I can do that .. in that case should the Kconfig
CONFIG_HAVE_HW_BREAKPOINT_ADDR_MASK go away, and in every non-x86
hw_breakpoint.c do:
bool arch_validate_hw_breakpoint_addr_mask(struct perf_event *bp)
{
return false;
}
?
Or keep CONFIG_HAVE_HW_BREAKPOINT_ADDR_MASK and in
include/linux/hw_breakpoint.h do:
#ifndef CONFIG_HAVE_HW_BREAKPOINT_ADDR_MASK
static inline bool arch_validate_hw_breakpoint_addr_mask(struct perf_event *bp)
{
return falase;
}
#endif
?
Thanks,
On Mon, Apr 22, 2013 at 04:37:15PM -0500, Jacob Shin wrote:
> On Sun, Apr 21, 2013 at 07:02:02PM +0200, Oleg Nesterov wrote:
> > On 04/20, Jacob Shin wrote:
> > >
> > > On Sat, Apr 20, 2013 at 06:53:34PM +0200, Oleg Nesterov wrote:
> > > >
> > > > And does attr.bp_len "contribute" to the mask?
> > > >
> > > > I mean, if bp_len == X86_BREAKPOINT_LEN_8, does this mean that
> > > > bp_bp_addr_mask and (bp_bp_addr_mask | 7) have the same effect?
> > >
> > > Yes it has the same effect.
> >
> > OK, thanks...
> >
> > So this is the "natural" extension. Given that currently bp_addr
> > should be aligned, bp_len could be already bp_mask but I guess it
> > is too late to change this, so we need another field.
> >
> > Hmm. Perhaps arch_has_hw_breakpoint_addr_mask(void) should be turned
> > into arch_validate_hw_breakpoint_addr_mask(bp) which should also
> > check that (bp_addr & bp_addr_mask) == 0. But I won't insist.
>
> Yes I can do that .. in that case should the Kconfig
> CONFIG_HAVE_HW_BREAKPOINT_ADDR_MASK go away, and in every non-x86
> hw_breakpoint.c do:
>
> bool arch_validate_hw_breakpoint_addr_mask(struct perf_event *bp)
> {
> return false;
Sorry, of course what I meant was,
return bp->attr.bp_addr_mask == 0;
> }
>
> ?
>
> Or keep CONFIG_HAVE_HW_BREAKPOINT_ADDR_MASK and in
> include/linux/hw_breakpoint.h do:
>
> #ifndef CONFIG_HAVE_HW_BREAKPOINT_ADDR_MASK
> static inline bool arch_validate_hw_breakpoint_addr_mask(struct perf_event *bp)
> {
> return falase;
here too.
> }
> #endif
>
> ?
>
> Thanks,
Just reading your other email, you said a __weak function would suffice.
So I'll do that .. sorry for answering my own question.
I'll send out a revised patchset sometime later tonight ..
Thanks again!
On Sun, Apr 21, 2013 at 07:19:21PM +0200, Oleg Nesterov wrote:
> Not a comment, but the question...
>
> On 04/09, Jacob Shin wrote:
> >
> > --- a/arch/x86/include/asm/hw_breakpoint.h
> > +++ b/arch/x86/include/asm/hw_breakpoint.h
> > @@ -14,6 +14,7 @@ struct arch_hw_breakpoint {
> > unsigned long address;
> > u8 len;
> > u8 type;
> > + u32 mask;
> > };
> ...
> > @@ -254,6 +258,7 @@ static int arch_build_bp_info(struct perf_event *bp)
> > struct arch_hw_breakpoint *info = counter_arch_bp(bp);
> >
> > info->address = bp->attr.bp_addr;
> > + info->mask = bp->attr.bp_addr_mask;
>
> OK, this matches the usage of info->address so I think this change
> is right.
>
> But otoh, why do we need info->address (or mask added by this patch)?
> we could use bp->attr.bp_addr instead. arch_hw_breakpoint could have
> a single filed = "type | len" for encode_dr7().
I understood this as maybe remapping arch independant uapi struct into
x86 specific struct. I guess to future proof in cause uapi interfaces
change.
>
> Yes, off-topic, sorry for noise.
>
> Oleg.
>
>