* Linus Torvalds <[email protected]> wrote:
> And there's quite a lot of them. Even in my (fairly small) config I use on
> my desktop. And the first warnings I see are in x86 code:
>
> arch/x86/kernel/traps.c:405:16: warning: implicit unsigned widening
> cast of a '~' expression
> arch/x86/kernel/cpu/perf_event_p4.c:912:15: warning: implicit unsigned
> widening cast of a '~' expression
Hm, the perf_event_p4.c code is indeed confused.
I think the bug is real but probably benign in effect: we allow narrower
values into the MSR register than probably intended. Only a couple of low
bits are reserved AFAICS.
Here's an (untested!) patch that tries to untangle it all: it just moves
to clean 64-bit types everywhere - these MSRs are 64-bit wide regardless
of whether we run on 32-bit or not.
Would be nice if someone with a working P4 could test it - Cyrill? [It
should also be double checked whether the high words are really not
reserved and can be written to ...]
Thanks,
Ingo
---->
Linus, while extending integer type extension checks in the sparse static
code checker, found fragile patterns of mixed signed/unsigned
64-bit/32-bit integer use in perf_events_p4.c.
The relevant hardware register ABI is 64 bit wide on 32-bit kernels as
well, so clean it all up a bit, remove unnecessary casts, and make sure we
use 64-bit unsigned integers in these places.
Signed-off-by: Ingo Molnar <[email protected]>
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 4f7e67e..85e13cc 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -24,45 +24,45 @@
#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
-#define P4_ESCR_EVENT_MASK 0x7e000000U
+#define P4_ESCR_EVENT_MASK 0x7e000000ULL
#define P4_ESCR_EVENT_SHIFT 25
-#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
+#define P4_ESCR_EVENTMASK_MASK 0x01fffe00ULL
#define P4_ESCR_EVENTMASK_SHIFT 9
-#define P4_ESCR_TAG_MASK 0x000001e0U
+#define P4_ESCR_TAG_MASK 0x000001e0ULL
#define P4_ESCR_TAG_SHIFT 5
-#define P4_ESCR_TAG_ENABLE 0x00000010U
-#define P4_ESCR_T0_OS 0x00000008U
-#define P4_ESCR_T0_USR 0x00000004U
-#define P4_ESCR_T1_OS 0x00000002U
-#define P4_ESCR_T1_USR 0x00000001U
+#define P4_ESCR_TAG_ENABLE 0x00000010ULL
+#define P4_ESCR_T0_OS 0x00000008ULL
+#define P4_ESCR_T0_USR 0x00000004ULL
+#define P4_ESCR_T1_OS 0x00000002ULL
+#define P4_ESCR_T1_USR 0x00000001ULL
#define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT)
#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
-#define P4_CCCR_OVF 0x80000000U
-#define P4_CCCR_CASCADE 0x40000000U
-#define P4_CCCR_OVF_PMI_T0 0x04000000U
-#define P4_CCCR_OVF_PMI_T1 0x08000000U
-#define P4_CCCR_FORCE_OVF 0x02000000U
-#define P4_CCCR_EDGE 0x01000000U
-#define P4_CCCR_THRESHOLD_MASK 0x00f00000U
+#define P4_CCCR_OVF 0x80000000ULL
+#define P4_CCCR_CASCADE 0x40000000ULL
+#define P4_CCCR_OVF_PMI_T0 0x04000000ULL
+#define P4_CCCR_OVF_PMI_T1 0x08000000ULL
+#define P4_CCCR_FORCE_OVF 0x02000000ULL
+#define P4_CCCR_EDGE 0x01000000ULL
+#define P4_CCCR_THRESHOLD_MASK 0x00f00000ULL
#define P4_CCCR_THRESHOLD_SHIFT 20
-#define P4_CCCR_COMPLEMENT 0x00080000U
-#define P4_CCCR_COMPARE 0x00040000U
-#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U
+#define P4_CCCR_COMPLEMENT 0x00080000ULL
+#define P4_CCCR_COMPARE 0x00040000ULL
+#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000ULL
#define P4_CCCR_ESCR_SELECT_SHIFT 13
-#define P4_CCCR_ENABLE 0x00001000U
-#define P4_CCCR_THREAD_SINGLE 0x00010000U
-#define P4_CCCR_THREAD_BOTH 0x00020000U
-#define P4_CCCR_THREAD_ANY 0x00030000U
-#define P4_CCCR_RESERVED 0x00000fffU
+#define P4_CCCR_ENABLE 0x00001000ULL
+#define P4_CCCR_THREAD_SINGLE 0x00010000ULL
+#define P4_CCCR_THREAD_BOTH 0x00020000ULL
+#define P4_CCCR_THREAD_ANY 0x00030000ULL
+#define P4_CCCR_RESERVED 0x00000fffULL
#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
#define P4_GEN_ESCR_EMASK(class, name, bit) \
- class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
+ class##__##name = ((1ULL << bit) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_EMASK_BIT(class, name) class##__##name
/*
@@ -107,7 +107,7 @@
* P4_PEBS_CONFIG_MASK and related bits on
* modification.)
*/
-#define P4_CONFIG_ALIASABLE (1 << 9)
+#define P4_CONFIG_ALIASABLE (1ULL << 9)
/*
* The bits we allow to pass for RAW events
@@ -784,17 +784,17 @@ enum P4_ESCR_EMASKS {
* Note we have UOP and PEBS bits reserved for now
* just in case if we will need them once
*/
-#define P4_PEBS_CONFIG_ENABLE (1 << 7)
-#define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
-#define P4_PEBS_CONFIG_METRIC_MASK 0x3f
-#define P4_PEBS_CONFIG_MASK 0xff
+#define P4_PEBS_CONFIG_ENABLE (1ULL << 7)
+#define P4_PEBS_CONFIG_UOP_TAG (1ULL << 8)
+#define P4_PEBS_CONFIG_METRIC_MASK 0x3FLL
+#define P4_PEBS_CONFIG_MASK 0xFFLL
/*
* mem: Only counters MSR_IQ_COUNTER4 (16) and
* MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
*/
-#define P4_PEBS_ENABLE 0x02000000U
-#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
+#define P4_PEBS_ENABLE 0x02000000ULL
+#define P4_PEBS_ENABLE_UOP_TAG 0x01000000ULL
#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 92c7e39..3486e66 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void)
* So at moment let leave metrics turned on forever -- it's
* ok for now but need to be revisited!
*
- * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0);
- * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+ * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
+ * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
*/
}
@@ -910,8 +910,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
* asserted again and again
*/
(void)wrmsrl_safe(hwc->config_base,
- (u64)(p4_config_unpack_cccr(hwc->config)) &
- ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
+ p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
}
static void p4_pmu_disable_all(void)
@@ -957,7 +956,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
u64 escr_addr, cccr;
bind = &p4_event_bind_map[idx];
- escr_addr = (u64)bind->escr_msr[thread];
+ escr_addr = bind->escr_msr[thread];
/*
* - we dont support cascaded counters yet
On Wed, Apr 24, 2013 at 09:26:30AM +0200, Ingo Molnar wrote:
>
> * Linus Torvalds <[email protected]> wrote:
>
> > And there's quite a lot of them. Even in my (fairly small) config I use on
> > my desktop. And the first warnings I see are in x86 code:
> >
> > arch/x86/kernel/traps.c:405:16: warning: implicit unsigned widening
> > cast of a '~' expression
> > arch/x86/kernel/cpu/perf_event_p4.c:912:15: warning: implicit unsigned
> > widening cast of a '~' expression
>
> Hm, the perf_event_p4.c code is indeed confused.
>
> I think the bug is real but probably benign in effect: we allow narrower
> values into the MSR register than probably intended. Only a couple of low
> bits are reserved AFAICS.
>
> Here's an (untested!) patch that tries to untangle it all: it just moves
> to clean 64-bit types everywhere - these MSRs are 64-bit wide regardless
> of whether we run on 32-bit or not.
>
> Would be nice if someone with a working P4 could test it - Cyrill? [It
> should also be double checked whether the high words are really not
> reserved and can be written to ...]
Hi Ingo! Ufortunately I don't have access to real p4 hardware,
thus I'm CC'ing Ming who has been helping a lot in testing
this code pieces.
Still the patch itself is perfectly fine to me
Reviewed-by: Cyrill Gorcunov <[email protected]>
DR6_RESERVED and DR_CONTROL_RESERVED are used to clear the set
bits in the "unsigned long" data, make them long to ensure that
"&~" doesn't clear the upper bits.
do_debug() and ptrace_write_dr7() which use DR*_RESERVED look
safe, but probably it makes sense to cleanup anyway.
Reported-by: Linus Torvalds <[email protected]>
Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/include/uapi/asm/debugreg.h | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index 3c0874d..75d07dd 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,7 @@
are either reserved or not of interest to us. */
/* Define reserved bits in DR6 which are always set to 1 */
-#define DR6_RESERVED (0xFFFF0FF0)
+#define DR6_RESERVED (0xFFFF0FF0ul)
#define DR_TRAP0 (0x1) /* db0 */
#define DR_TRAP1 (0x2) /* db1 */
@@ -65,7 +65,7 @@
gdt or the ldt if we want to. I am not sure why this is an advantage */
#ifdef __i386__
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#define DR_CONTROL_RESERVED (0xFC00ul) /* Reserved by Intel */
#else
#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
#endif
--
1.5.5.1
On 04/24/2013 10:07 AM, Oleg Nesterov wrote:
> DR6_RESERVED and DR_CONTROL_RESERVED are used to clear the set
> bits in the "unsigned long" data, make them long to ensure that
> "&~" doesn't clear the upper bits.
>
> do_debug() and ptrace_write_dr7() which use DR*_RESERVED look
> safe, but probably it makes sense to cleanup anyway.
>
> Reported-by: Linus Torvalds <[email protected]>
> Signed-off-by: Oleg Nesterov <[email protected]>
Can you please put in the description if this is a manifest or
non-manifest bug and, if manifest, what the issues are? It greatly
affects what we otherwise have to do to address the bug.
Also, the -UL suffix is usually capitalized in our codebase.
-hpa
2013/4/24 Oleg Nesterov <[email protected]>:
> DR6_RESERVED and DR_CONTROL_RESERVED are used to clear the set
> bits in the "unsigned long" data, make them long to ensure that
> "&~" doesn't clear the upper bits.
>
> do_debug() and ptrace_write_dr7() which use DR*_RESERVED look
> safe, but probably it makes sense to cleanup anyway.
Agreed. The code looks safe, but the pattern is error prone. I'm all
for that cleanup.
Just something below:
>
> Reported-by: Linus Torvalds <[email protected]>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> arch/x86/include/uapi/asm/debugreg.h | 4 ++--
> 1 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
> index 3c0874d..75d07dd 100644
> --- a/arch/x86/include/uapi/asm/debugreg.h
> +++ b/arch/x86/include/uapi/asm/debugreg.h
> @@ -15,7 +15,7 @@
> are either reserved or not of interest to us. */
>
> /* Define reserved bits in DR6 which are always set to 1 */
> -#define DR6_RESERVED (0xFFFF0FF0)
> +#define DR6_RESERVED (0xFFFF0FF0ul)
You told in an earlier email that intel manual says upper 32 bits of
dr6 are reserved.
In this case don't we need to expand the mask in 64 bits like is done
for DR_CONTROL_RESERVED?
Thanks.
On 04/24/2013 03:48 PM, Frederic Weisbecker wrote:
> 2013/4/24 Oleg Nesterov <[email protected]>:
>> DR6_RESERVED and DR_CONTROL_RESERVED are used to clear the set
>> bits in the "unsigned long" data, make them long to ensure that
>> "&~" doesn't clear the upper bits.
>>
>> do_debug() and ptrace_write_dr7() which use DR*_RESERVED look
>> safe, but probably it makes sense to cleanup anyway.
>
> Agreed. The code looks safe, but the pattern is error prone. I'm all
> for that cleanup.
> Just something below:
>
>>
>> Reported-by: Linus Torvalds <[email protected]>
>> Signed-off-by: Oleg Nesterov <[email protected]>
>> ---
>> arch/x86/include/uapi/asm/debugreg.h | 4 ++--
>> 1 files changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
>> index 3c0874d..75d07dd 100644
>> --- a/arch/x86/include/uapi/asm/debugreg.h
>> +++ b/arch/x86/include/uapi/asm/debugreg.h
>> @@ -15,7 +15,7 @@
>> are either reserved or not of interest to us. */
>>
>> /* Define reserved bits in DR6 which are always set to 1 */
>> -#define DR6_RESERVED (0xFFFF0FF0)
>> +#define DR6_RESERVED (0xFFFF0FF0ul)
>
> You told in an earlier email that intel manual says upper 32 bits of
> dr6 are reserved.
> In this case don't we need to expand the mask in 64 bits like is done
> for DR_CONTROL_RESERVED?
>
Arguably this would be a *good* use for ~ ...
Instead of defining separate bitmasks for 32 and 64 bits have the
reciprocal (non-reserved bits):
#define DR6_RESERVED (~0x0000F00FUL)
That does have the right value on both 32 and 64 bits. The leading
zeroes aren't even really needed.
Now, DR6 is a bit special in that a bunch of the reserved bits are
hardwired to 1, not 0; I don't know offhand if that is true for bits
[63:32].
-hpa
2013/4/25 H. Peter Anvin <[email protected]>:
> On 04/24/2013 03:48 PM, Frederic Weisbecker wrote:
>> You told in an earlier email that intel manual says upper 32 bits of
>> dr6 are reserved.
>> In this case don't we need to expand the mask in 64 bits like is done
>> for DR_CONTROL_RESERVED?
>>
>
> Arguably this would be a *good* use for ~ ...
>
> Instead of defining separate bitmasks for 32 and 64 bits have the
> reciprocal (non-reserved bits):
>
> #define DR6_RESERVED (~0x0000F00FUL)
>
> That does have the right value on both 32 and 64 bits. The leading
> zeroes aren't even really needed.
Ah, looks better indeed.
>
> Now, DR6 is a bit special in that a bunch of the reserved bits are
> hardwired to 1, not 0; I don't know offhand if that is true for bits
> [63:32].
Hmm, good point, could it be a problem given that we clear the
reserved dr6 bits on do_trap() and write that 'cleaned up" value back
to "tsk->thread.debugreg6"? Probably not if those hardwired reserved
bits are set to "1" on dr6 physical write whether those bits are
cleared or not in their storage in thread struct before resuming the
task?
On Wed, Apr 24, 2013 at 3:47 PM, Cyrill Gorcunov <[email protected]> wrote:
> On Wed, Apr 24, 2013 at 09:26:30AM +0200, Ingo Molnar wrote:
>>
>> * Linus Torvalds <[email protected]> wrote:
>>
>> > And there's quite a lot of them. Even in my (fairly small) config I use on
>> > my desktop. And the first warnings I see are in x86 code:
>> >
>> > arch/x86/kernel/traps.c:405:16: warning: implicit unsigned widening
>> > cast of a '~' expression
>> > arch/x86/kernel/cpu/perf_event_p4.c:912:15: warning: implicit unsigned
>> > widening cast of a '~' expression
>>
>> Hm, the perf_event_p4.c code is indeed confused.
>>
>> I think the bug is real but probably benign in effect: we allow narrower
>> values into the MSR register than probably intended. Only a couple of low
>> bits are reserved AFAICS.
>>
>> Here's an (untested!) patch that tries to untangle it all: it just moves
>> to clean 64-bit types everywhere - these MSRs are 64-bit wide regardless
>> of whether we run on 32-bit or not.
>>
>> Would be nice if someone with a working P4 could test it - Cyrill? [It
>> should also be double checked whether the high words are really not
>> reserved and can be written to ...]
>
> Hi Ingo! Ufortunately I don't have access to real p4 hardware,
> thus I'm CC'ing Ming who has been helping a lot in testing
> this code pieces.
Hi,
Sorry, but I don't have access to p4 hardware either.
Lin Ming
>
> Still the patch itself is perfectly fine to me
>
> Reviewed-by: Cyrill Gorcunov <[email protected]>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On 04/24/2013 04:31 PM, Frederic Weisbecker wrote:
>>
>> Now, DR6 is a bit special in that a bunch of the reserved bits are
>> hardwired to 1, not 0; I don't know offhand if that is true for bits
>> [63:32].
>
> Hmm, good point, could it be a problem given that we clear the
> reserved dr6 bits on do_trap() and write that 'cleaned up" value back
> to "tsk->thread.debugreg6"? Probably not if those hardwired reserved
> bits are set to "1" on dr6 physical write whether those bits are
> cleared or not in their storage in thread struct before resuming the
> task?
>
OK, the SDM states that DR6[63:32] are reserved and must be written as
zero (not one).
So the quiescent 64-bit value of DR6 is 0x0000_0000_FFFF_0FF0.
-hpa
On 04/24, H. Peter Anvin wrote:
>
> On 04/24/2013 10:07 AM, Oleg Nesterov wrote:
> > DR6_RESERVED and DR_CONTROL_RESERVED are used to clear the set
> > bits in the "unsigned long" data, make them long to ensure that
> > "&~" doesn't clear the upper bits.
> >
> > do_debug() and ptrace_write_dr7() which use DR*_RESERVED look
> > safe, but probably it makes sense to cleanup anyway.
> >
> > Reported-by: Linus Torvalds <[email protected]>
> > Signed-off-by: Oleg Nesterov <[email protected]>
>
> Can you please put in the description if this is a manifest or
> non-manifest bug and, if manifest, what the issues are? It greatly
> affects what we otherwise have to do to address the bug.
Sorry if it was not clear, I tried to explain in the changelog that
this is only cleanup. Yes, dr6 should have all zeroes in 32-64 so
the usage of DR6_RESERVED is safe.
> Also, the -UL suffix is usually capitalized in our codebase.
OK, iiuc otherwise you agree with this change. I'll send v2.
Oleg.
Commit-ID: 5ac2b5c2721501a8f5c5e1cd4116cbc31ace6886
Gitweb: http://git.kernel.org/tip/5ac2b5c2721501a8f5c5e1cd4116cbc31ace6886
Author: Ingo Molnar <[email protected]>
AuthorDate: Wed, 24 Apr 2013 09:26:30 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 26 Apr 2013 09:31:41 +0200
perf/x86/intel/P4: Robistify P4 PMU types
Linus found, while extending integer type extension checks in the
sparse static code checker, various fragile patterns of mixed
signed/unsigned 64-bit/32-bit integer use in perf_events_p4.c.
The relevant hardware register ABI is 64 bit wide on 32-bit
kernels as well, so clean it all up a bit, remove unnecessary
casts, and make sure we use 64-bit unsigned integers in these
places.
[ Unfortunately this patch was not tested on real P4 hardware,
those are pretty rare already. If this patch causes any
problems on P4 hardware then please holler ... ]
Reported-by: Linus Torvalds <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Cc: David Miller <[email protected]>
Cc: Theodore Ts'o <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Cyrill Gorcunov <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/perf_event_p4.h | 62 ++++++++++++++++++------------------
arch/x86/kernel/cpu/perf_event_p4.c | 9 +++---
2 files changed, 35 insertions(+), 36 deletions(-)
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 4f7e67e..85e13cc 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -24,45 +24,45 @@
#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
-#define P4_ESCR_EVENT_MASK 0x7e000000U
+#define P4_ESCR_EVENT_MASK 0x7e000000ULL
#define P4_ESCR_EVENT_SHIFT 25
-#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
+#define P4_ESCR_EVENTMASK_MASK 0x01fffe00ULL
#define P4_ESCR_EVENTMASK_SHIFT 9
-#define P4_ESCR_TAG_MASK 0x000001e0U
+#define P4_ESCR_TAG_MASK 0x000001e0ULL
#define P4_ESCR_TAG_SHIFT 5
-#define P4_ESCR_TAG_ENABLE 0x00000010U
-#define P4_ESCR_T0_OS 0x00000008U
-#define P4_ESCR_T0_USR 0x00000004U
-#define P4_ESCR_T1_OS 0x00000002U
-#define P4_ESCR_T1_USR 0x00000001U
+#define P4_ESCR_TAG_ENABLE 0x00000010ULL
+#define P4_ESCR_T0_OS 0x00000008ULL
+#define P4_ESCR_T0_USR 0x00000004ULL
+#define P4_ESCR_T1_OS 0x00000002ULL
+#define P4_ESCR_T1_USR 0x00000001ULL
#define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT)
#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
-#define P4_CCCR_OVF 0x80000000U
-#define P4_CCCR_CASCADE 0x40000000U
-#define P4_CCCR_OVF_PMI_T0 0x04000000U
-#define P4_CCCR_OVF_PMI_T1 0x08000000U
-#define P4_CCCR_FORCE_OVF 0x02000000U
-#define P4_CCCR_EDGE 0x01000000U
-#define P4_CCCR_THRESHOLD_MASK 0x00f00000U
+#define P4_CCCR_OVF 0x80000000ULL
+#define P4_CCCR_CASCADE 0x40000000ULL
+#define P4_CCCR_OVF_PMI_T0 0x04000000ULL
+#define P4_CCCR_OVF_PMI_T1 0x08000000ULL
+#define P4_CCCR_FORCE_OVF 0x02000000ULL
+#define P4_CCCR_EDGE 0x01000000ULL
+#define P4_CCCR_THRESHOLD_MASK 0x00f00000ULL
#define P4_CCCR_THRESHOLD_SHIFT 20
-#define P4_CCCR_COMPLEMENT 0x00080000U
-#define P4_CCCR_COMPARE 0x00040000U
-#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U
+#define P4_CCCR_COMPLEMENT 0x00080000ULL
+#define P4_CCCR_COMPARE 0x00040000ULL
+#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000ULL
#define P4_CCCR_ESCR_SELECT_SHIFT 13
-#define P4_CCCR_ENABLE 0x00001000U
-#define P4_CCCR_THREAD_SINGLE 0x00010000U
-#define P4_CCCR_THREAD_BOTH 0x00020000U
-#define P4_CCCR_THREAD_ANY 0x00030000U
-#define P4_CCCR_RESERVED 0x00000fffU
+#define P4_CCCR_ENABLE 0x00001000ULL
+#define P4_CCCR_THREAD_SINGLE 0x00010000ULL
+#define P4_CCCR_THREAD_BOTH 0x00020000ULL
+#define P4_CCCR_THREAD_ANY 0x00030000ULL
+#define P4_CCCR_RESERVED 0x00000fffULL
#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
#define P4_GEN_ESCR_EMASK(class, name, bit) \
- class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
+ class##__##name = ((1ULL << bit) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_EMASK_BIT(class, name) class##__##name
/*
@@ -107,7 +107,7 @@
* P4_PEBS_CONFIG_MASK and related bits on
* modification.)
*/
-#define P4_CONFIG_ALIASABLE (1 << 9)
+#define P4_CONFIG_ALIASABLE (1ULL << 9)
/*
* The bits we allow to pass for RAW events
@@ -784,17 +784,17 @@ enum P4_ESCR_EMASKS {
* Note we have UOP and PEBS bits reserved for now
* just in case if we will need them once
*/
-#define P4_PEBS_CONFIG_ENABLE (1 << 7)
-#define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
-#define P4_PEBS_CONFIG_METRIC_MASK 0x3f
-#define P4_PEBS_CONFIG_MASK 0xff
+#define P4_PEBS_CONFIG_ENABLE (1ULL << 7)
+#define P4_PEBS_CONFIG_UOP_TAG (1ULL << 8)
+#define P4_PEBS_CONFIG_METRIC_MASK 0x3FLL
+#define P4_PEBS_CONFIG_MASK 0xFFLL
/*
* mem: Only counters MSR_IQ_COUNTER4 (16) and
* MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
*/
-#define P4_PEBS_ENABLE 0x02000000U
-#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
+#define P4_PEBS_ENABLE 0x02000000ULL
+#define P4_PEBS_ENABLE_UOP_TAG 0x01000000ULL
#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 92c7e39..3486e66 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void)
* So at moment let leave metrics turned on forever -- it's
* ok for now but need to be revisited!
*
- * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0);
- * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+ * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
+ * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
*/
}
@@ -910,8 +910,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
* asserted again and again
*/
(void)wrmsrl_safe(hwc->config_base,
- (u64)(p4_config_unpack_cccr(hwc->config)) &
- ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
+ p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
}
static void p4_pmu_disable_all(void)
@@ -957,7 +956,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
u64 escr_addr, cccr;
bind = &p4_event_bind_map[idx];
- escr_addr = (u64)bind->escr_msr[thread];
+ escr_addr = bind->escr_msr[thread];
/*
* - we dont support cascaded counters yet
On Fri, Apr 26, 2013 at 07:20:46AM -0700, tip-bot for Ingo Molnar wrote:
> Commit-ID: 5ac2b5c2721501a8f5c5e1cd4116cbc31ace6886
> Gitweb: http://git.kernel.org/tip/5ac2b5c2721501a8f5c5e1cd4116cbc31ace6886
> Author: Ingo Molnar <[email protected]>
> AuthorDate: Wed, 24 Apr 2013 09:26:30 +0200
> Committer: Ingo Molnar <[email protected]>
> CommitDate: Fri, 26 Apr 2013 09:31:41 +0200
>
> perf/x86/intel/P4: Robistify P4 PMU types
>
> Linus found, while extending integer type extension checks in the
> sparse static code checker, various fragile patterns of mixed
> signed/unsigned 64-bit/32-bit integer use in perf_events_p4.c.
>
> The relevant hardware register ABI is 64 bit wide on 32-bit
> kernels as well, so clean it all up a bit, remove unnecessary
> casts, and make sure we use 64-bit unsigned integers in these
> places.
>
> [ Unfortunately this patch was not tested on real P4 hardware,
> those are pretty rare already. If this patch causes any
> problems on P4 hardware then please holler ... ]
I have a P4 and I turn it on from time to time when it's cold in the
room :-).
Any particular tests you want me to run on it?
processor : 0
vendor_id : GenuineIntel
cpu family : 15
model : 2
model name : Intel(R) Pentium(R) 4 CPU 2.60GHz
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--
On Fri, Apr 26, 2013 at 06:13:28PM +0200, Borislav Petkov wrote:
>
> I have a P4 and I turn it on from time to time when it's cold in the
> room :-).
>
> Any particular tests you want me to run on it?
>
> processor : 0
> vendor_id : GenuineIntel
> cpu family : 15
> model : 2
> model name : Intel(R) Pentium(R) 4 CPU 2.60GHz
Cool! Easiest way I think is to run "perf top" (iirc that's the
command for top-like output). And if possible some branch instructions
testing.
On Fri, Apr 26, 2013 at 08:24:44PM +0400, Cyrill Gorcunov wrote:
> And if possible some branch instructions testing.
By that you mean, see whether it counts branches?
./perf stat sleep 0
Performance counter stats for 'sleep 0':
2.483869 task-clock # 0.463 CPUs utilized
1 context-switches # 0.403 K/sec
0 cpu-migrations # 0.000 K/sec
168 page-faults # 0.068 M/sec
3,435,630 cycles # 1.383 GHz
736,680 stalled-cycles-frontend # 21.44% frontend cycles idle
656,603 stalled-cycles-backend # 19.11% backend cycles idle
2,932,775 instructions # 0.85 insns per cycle
# 0.25 stalled cycles per insn
590,855 branches # 237.877 M/sec
12,882 branch-misses # 2.18% of all branches
^^^^^^^^^^^^^^^^^^^^
Those above?
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--
DR6_RESERVED and DR_CONTROL_RESERVED are used to clear the set
bits in the "unsigned long" data, make them long to ensure that
"&~" doesn't clear the upper bits.
This is only cleanup, the usage of ~DR*_RESERVED is safe but
doesn't look clean and the pattern is error prone.
- do_debug:
dr6 &= ~DR6_RESERVED;
this also wrongly clears 32-63 bits. Fortunately these
bits are reserved and must be zero.
- ptrace_write_dr7:
data &= ~DR_CONTROL_RESERVED;
on __i386__ this mixes long/int but sizeof should be the
same.
Reported-by: Linus Torvalds <[email protected]>
Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/include/uapi/asm/debugreg.h | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index 3c0874d..c0c1b89 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,7 @@
are either reserved or not of interest to us. */
/* Define reserved bits in DR6 which are always set to 1 */
-#define DR6_RESERVED (0xFFFF0FF0)
+#define DR6_RESERVED (0xFFFF0FF0UL)
#define DR_TRAP0 (0x1) /* db0 */
#define DR_TRAP1 (0x2) /* db1 */
@@ -65,7 +65,7 @@
gdt or the ldt if we want to. I am not sure why this is an advantage */
#ifdef __i386__
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#define DR_CONTROL_RESERVED (0xFC00UL) /* Reserved by Intel */
#else
#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
#endif
--
1.5.5.1
On 04/26/2013 09:38 AM, Oleg Nesterov wrote:
> DR6_RESERVED and DR_CONTROL_RESERVED are used to clear the set
> bits in the "unsigned long" data, make them long to ensure that
> "&~" doesn't clear the upper bits.
>
> This is only cleanup, the usage of ~DR*_RESERVED is safe but
> doesn't look clean and the pattern is error prone.
>
> - do_debug:
>
> dr6 &= ~DR6_RESERVED;
>
> this also wrongly clears 32-63 bits. Fortunately these
> bits are reserved and must be zero.
>
I don't think this is wrongly at all. The whole point is to mask out
the bits that the handler doesn't want to deal with, so masking out the
reserved bits [63:32] seems reasonable to me.
The comment should probably be corrected, though.
-hpa
On Fri, Apr 26, 2013 at 06:39:52PM +0200, Borislav Petkov wrote:
> # 0.25 stalled cycles per insn
> 590,855 branches # 237.877 M/sec
> 12,882 branch-misses # 2.18% of all branches
> ^^^^^^^^^^^^^^^^^^^^
>
> Those above?
Yeah, thanks!
On 04/26, H. Peter Anvin wrote:
>
> On 04/26/2013 09:38 AM, Oleg Nesterov wrote:
> >
> > - do_debug:
> >
> > dr6 &= ~DR6_RESERVED;
> >
> > this also wrongly clears 32-63 bits. Fortunately these
> > bits are reserved and must be zero.
>
> I don't think this is wrongly at all.
OK, I meant that it also clears the bits that are not specified in
DR6_RESERVED mask.
> The whole point is to mask out
> the bits that the handler doesn't want to deal with, so masking out the
> reserved bits [63:32] seems reasonable to me.
Then we should do
- #define DR6_RESERVED 0xFFFF0FF0
+ #define DR6_RESERVED 0xFFFFFFFFFFFF0FF0
?
or what? (just in case, I will happily agree with "do nothing" ;)
> The comment should probably be corrected, though.
Which one?
/* Define reserved bits in DR6 which are always set to 1 */
#define DR6_RESERVED (0xFFFF0FF0UL)
/* Filter out all the reserved bits which are preset to 1 */
dr6 &= ~DR6_RESERVED;
I guess both should be updated then. But if I read the doc correctly
the lower reserved bits are set to 1.
However do_debug() does set_debugreg(0, 6) and this looks correct, the
doc says "debug handlers should clear the register before returning to
the interrupted task".
Oleg.
On Fri, Apr 26, 2013 at 08:46:52PM +0400, Cyrill Gorcunov wrote:
> On Fri, Apr 26, 2013 at 06:39:52PM +0200, Borislav Petkov wrote:
> > # 0.25 stalled cycles per insn
> > 590,855 branches # 237.877 M/sec
> > 12,882 branch-misses # 2.18% of all branches
> > ^^^^^^^^^^^^^^^^^^^^
> >
> > Those above?
Well, perf top looks ok to me, here's a snapshot:
PerfTop: 63 irqs/sec kernel:79.4% exact: 0.0% [4000Hz cycles], (all, 2 CPUs)
--------------------------------------------------------------------------------------------------------------------------------------------
11.21% [kernel] [k] __lock_acquire
7.87% libc-2.13.so [.] 0x00078b0c
5.78% libz.so.1.2.7 [.] 0x00003731
4.29% libpthread-2.13.so [.] pthread_rwlock_unlock
3.74% libpthread-2.13.so [.] pthread_rwlock_rdlock
3.67% [kernel] [k] lock_release
2.55% [kernel] [k] lock_acquire
2.27% perf [.] symbols__insert
2.15% sshd [.] 0x0004707e
1.62% libc-2.13.so [.] vfprintf
1.58% [kernel] [k] mark_held_locks
1.40% [kernel] [k] do_raw_spin_lock
1.37% [kernel] [k] trace_hardirqs_on_caller
1.29% [kernel] [k] sub_preempt_count
1.17% perf [.] symbol_filter
1.13% [kernel] [k] mark_lock
1.05% [kernel] [k] trace_hardirqs_off_caller
0.96% perf [.] rb_next
0.94% libc-2.13.so [.] memchr
0.80% libbfd-2.22-system.so [.] 0x000bb009
0.72% [kernel] [k] __schedule
0.71% [kernel] [k] ioread16
0.67% [kernel] [k] _raw_spin_unlock_irqrestore
0.66% [kernel] [k] __switch_to
0.59% [kernel] [k] do_raw_spin_unlock
0.56% perf [.] dso__load_sym
...
I can annotate symbols and disassemble works fine too, along with
refresh and per-insn overhead.
The other trivial test passes too, although branch-misses doesn't get
counted:
./perf stat sleep 1
Performance counter stats for 'sleep 1':
1.433368 task-clock # 0.001 CPUs utilized
1 context-switches # 0.698 K/sec
0 cpu-migrations # 0.000 K/sec
147 page-faults # 0.103 M/sec
78,446 cycles # 0.055 GHz
0 stalled-cycles-frontend # 0.00% frontend cycles idle
0 stalled-cycles-backend # 0.00% backend cycles idle [27.37%]
1,268,044 instructions # 16.16 insns per cycle [27.37%]
223,742 branches # 156.095 M/sec [27.37%]
<not counted> branch-misses
1.002191045 seconds time elapsed
However, if I do this, it works:
./perf stat -e branch-misses sleep 1
Performance counter stats for 'sleep 1':
8,583 branch-misses
1.001992384 seconds time elapsed
Oh, btw, tip/master has
commit 697dfd884438058b15032b0169887c742704434a
Merge: 0fbd06761f5c f697036b93aa
Author: H. Peter Anvin <[email protected]>
Date: Thu Apr 25 14:00:22 2013 -0700
Merge tag 'efi-urgent' into x86/urgent
as its top commit.
HTH.
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--
I don't know why this is uapi... finest make a lot of sense to me.
Oleg Nesterov <[email protected]> wrote:
>On 04/26, Oleg Nesterov wrote:
>
>> On 04/26, H. Peter Anvin wrote:
>> >
>> > On 04/26/2013 09:38 AM, Oleg Nesterov wrote:
>> > >
>> > > - do_debug:
>> > >
>> > > dr6 &= ~DR6_RESERVED;
>> > >
>> > > this also wrongly clears 32-63 bits. Fortunately these
>> > > bits are reserved and must be zero.
>> >
>> > I don't think this is wrongly at all.
>>
>> OK, I meant that it also clears the bits that are not specified in
>> DR6_RESERVED mask.
>>
>> > The whole point is to mask out
>> > the bits that the handler doesn't want to deal with, so masking out
>the
>> > reserved bits [63:32] seems reasonable to me.
>>
>> Then we should do
>>
>> - #define DR6_RESERVED 0xFFFF0FF0
>> + #define DR6_RESERVED 0xFFFFFFFFFFFF0FF0
>>
>> ?
>>
>> or what? (just in case, I will happily agree with "do nothing" ;)
>
>Or we can do the s/reserved/mask/ change and avoid any "unexpected"
>effect of "long &= ~int". This allso allows to kill ifdef(__i386__).
>
>But this is include/uapi, I do not know if I can simply remove the
>old define's.
>
>In short: whatever you prefer, including "leave it alone".
>
>Oleg.
>
>diff --git a/arch/x86/include/uapi/asm/debugreg.h
>b/arch/x86/include/uapi/asm/debugreg.h
>index 3c0874d..2678b23 100644
>--- a/arch/x86/include/uapi/asm/debugreg.h
>+++ b/arch/x86/include/uapi/asm/debugreg.h
>@@ -14,8 +14,7 @@
> which debugging register was responsible for the trap. The other bits
> are either reserved or not of interest to us. */
>
>-/* Define reserved bits in DR6 which are always set to 1 */
>-#define DR6_RESERVED (0xFFFF0FF0)
>+#define DR6_MASK (0xF00FU) /* Everything else is reserved */
>
> #define DR_TRAP0 (0x1) /* db0 */
> #define DR_TRAP1 (0x2) /* db1 */
>@@ -32,6 +31,8 @@
> and indicates what types of access we trap on, and how large the data
> field is that we are looking at */
>
>+#define DR_CONTROL_MASK (0xFFFF03FFU) /* Everything else is reserved
>*/
>+
> #define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
> #define DR_CONTROL_SIZE 4 /* 4 control bits per register */
>
>@@ -64,12 +65,6 @@
> We can slow the instruction pipeline for instructions coming via the
>gdt or the ldt if we want to. I am not sure why this is an advantage
>*/
>
>-#ifdef __i386__
>-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
>-#else
>-#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
>-#endif
>-
> #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
> #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
>
>diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
>index 7461f50..bc5fb98 100644
>--- a/arch/x86/kernel/ptrace.c
>+++ b/arch/x86/kernel/ptrace.c
>@@ -657,7 +657,7 @@ static int ptrace_write_dr7(struct task_struct
>*tsk, unsigned long data)
> bool second_pass = false;
> int i, rc, ret = 0;
>
>- data &= ~DR_CONTROL_RESERVED;
>+ data &= DR_CONTROL_MASK;
> old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
>
> restore:
>diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
>index 68bda7a..42a635f 100644
>--- a/arch/x86/kernel/traps.c
>+++ b/arch/x86/kernel/traps.c
>@@ -402,7 +402,7 @@ dotraplinkage void __kprobes do_debug(struct
>pt_regs *regs, long error_code)
> get_debugreg(dr6, 6);
>
> /* Filter out all the reserved bits which are preset to 1 */
>- dr6 &= ~DR6_RESERVED;
>+ dr6 &= DR6_MASK;
>
> /*
> * If dr6 has no reason to give us about the origin of this trap,
--
Sent from my mobile phone. Please excuse brevity and lack of formatting.
On 04/26, Oleg Nesterov wrote:
> On 04/26, H. Peter Anvin wrote:
> >
> > On 04/26/2013 09:38 AM, Oleg Nesterov wrote:
> > >
> > > - do_debug:
> > >
> > > dr6 &= ~DR6_RESERVED;
> > >
> > > this also wrongly clears 32-63 bits. Fortunately these
> > > bits are reserved and must be zero.
> >
> > I don't think this is wrongly at all.
>
> OK, I meant that it also clears the bits that are not specified in
> DR6_RESERVED mask.
>
> > The whole point is to mask out
> > the bits that the handler doesn't want to deal with, so masking out the
> > reserved bits [63:32] seems reasonable to me.
>
> Then we should do
>
> - #define DR6_RESERVED 0xFFFF0FF0
> + #define DR6_RESERVED 0xFFFFFFFFFFFF0FF0
>
> ?
>
> or what? (just in case, I will happily agree with "do nothing" ;)
Or we can do the s/reserved/mask/ change and avoid any "unexpected"
effect of "long &= ~int". This allso allows to kill ifdef(__i386__).
But this is include/uapi, I do not know if I can simply remove the
old define's.
In short: whatever you prefer, including "leave it alone".
Oleg.
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index 3c0874d..2678b23 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -14,8 +14,7 @@
which debugging register was responsible for the trap. The other bits
are either reserved or not of interest to us. */
-/* Define reserved bits in DR6 which are always set to 1 */
-#define DR6_RESERVED (0xFFFF0FF0)
+#define DR6_MASK (0xF00FU) /* Everything else is reserved */
#define DR_TRAP0 (0x1) /* db0 */
#define DR_TRAP1 (0x2) /* db1 */
@@ -32,6 +31,8 @@
and indicates what types of access we trap on, and how large the data
field is that we are looking at */
+#define DR_CONTROL_MASK (0xFFFF03FFU) /* Everything else is reserved */
+
#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
@@ -64,12 +65,6 @@
We can slow the instruction pipeline for instructions coming via the
gdt or the ldt if we want to. I am not sure why this is an advantage */
-#ifdef __i386__
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
-#else
-#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
-#endif
-
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7461f50..bc5fb98 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -657,7 +657,7 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
bool second_pass = false;
int i, rc, ret = 0;
- data &= ~DR_CONTROL_RESERVED;
+ data &= DR_CONTROL_MASK;
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
restore:
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 68bda7a..42a635f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -402,7 +402,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
get_debugreg(dr6, 6);
/* Filter out all the reserved bits which are preset to 1 */
- dr6 &= ~DR6_RESERVED;
+ dr6 &= DR6_MASK;
/*
* If dr6 has no reason to give us about the origin of this trap,
On Sat, Apr 27, 2013 at 06:14:07PM +0200, Borislav Petkov wrote:
...
Thanks a LOT, Borislav!
> The other trivial test passes too, although branch-misses doesn't get
> counted:
>
> ./perf stat sleep 1
>
> Performance counter stats for 'sleep 1':
>
> 1.433368 task-clock # 0.001 CPUs utilized
> 1 context-switches # 0.698 K/sec
> 0 cpu-migrations # 0.000 K/sec
> 147 page-faults # 0.103 M/sec
> 78,446 cycles # 0.055 GHz
> 0 stalled-cycles-frontend # 0.00% frontend cycles idle
> 0 stalled-cycles-backend # 0.00% backend cycles idle [27.37%]
> 1,268,044 instructions # 16.16 insns per cycle [27.37%]
> 223,742 branches # 156.095 M/sec [27.37%]
> <not counted> branch-misses
>
> 1.002191045 seconds time elapsed
It's known effect. instructions retired and branch-misses can't
run simultaneously since they both use same escr msr register
(if I recall correctly, out of repo at moment).
> However, if I do this, it works:
>
> ./perf stat -e branch-misses sleep 1
>
> Performance counter stats for 'sleep 1':
>
> 8,583 branch-misses
>
> 1.001992384 seconds time elapsed
Thanks!
On Sat, Apr 27, 2013 at 04:45:37PM +0200, Oleg Nesterov wrote:
> On 04/26, Oleg Nesterov wrote:
>
> > On 04/26, H. Peter Anvin wrote:
> > >
> > > On 04/26/2013 09:38 AM, Oleg Nesterov wrote:
> > > >
> > > > - do_debug:
> > > >
> > > > dr6 &= ~DR6_RESERVED;
> > > >
> > > > this also wrongly clears 32-63 bits. Fortunately these
> > > > bits are reserved and must be zero.
> > >
> > > I don't think this is wrongly at all.
> >
> > OK, I meant that it also clears the bits that are not specified in
> > DR6_RESERVED mask.
> >
> > > The whole point is to mask out
> > > the bits that the handler doesn't want to deal with, so masking out the
> > > reserved bits [63:32] seems reasonable to me.
> >
> > Then we should do
> >
> > - #define DR6_RESERVED 0xFFFF0FF0
> > + #define DR6_RESERVED 0xFFFFFFFFFFFF0FF0
> >
> > ?
> >
> > or what? (just in case, I will happily agree with "do nothing" ;)
>
> Or we can do the s/reserved/mask/ change and avoid any "unexpected"
> effect of "long &= ~int". This allso allows to kill ifdef(__i386__).
>
> But this is include/uapi, I do not know if I can simply remove the
> old define's.
>
> In short: whatever you prefer, including "leave it alone".
>
> Oleg.
>
> diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
> index 3c0874d..2678b23 100644
> --- a/arch/x86/include/uapi/asm/debugreg.h
> +++ b/arch/x86/include/uapi/asm/debugreg.h
> @@ -14,8 +14,7 @@
> which debugging register was responsible for the trap. The other bits
> are either reserved or not of interest to us. */
>
> -/* Define reserved bits in DR6 which are always set to 1 */
> -#define DR6_RESERVED (0xFFFF0FF0)
> +#define DR6_MASK (0xF00FU) /* Everything else is reserved */
I'm personally fine either with that or with Peter's suggestion to do:
-#define DR6_RESERVED (0xFFFF0FF0)
+#define DR6_RESERVED (~0xF00FUL)
If this should stay stable UAPI, we probably want Peter's solution. Otherwise
I really don't mind.
Thanks.
On 04/28, Frederic Weisbecker wrote:
>
> On Sat, Apr 27, 2013 at 04:45:37PM +0200, Oleg Nesterov wrote:
> >
> > -/* Define reserved bits in DR6 which are always set to 1 */
> > -#define DR6_RESERVED (0xFFFF0FF0)
> > +#define DR6_MASK (0xF00FU) /* Everything else is reserved */
>
> I'm personally fine either with that or with Peter's suggestion to do:
>
> -#define DR6_RESERVED (0xFFFF0FF0)
> +#define DR6_RESERVED (~0xF00FUL)
I missed this suggestion...
Yes, and this allows to kill ifdef too.
> If this should stay stable UAPI,
I do not know, but I guess it would be safer to keep the old define's.
> I really don't mind.
Oh, I do not mind too ;)
OK, please see v3.
------------------------------------------------------------------------------
Subject: [PATCH v3] x86: make DR*_RESERVED unsigned long
DR6_RESERVED and DR_CONTROL_RESERVED are used to clear the unwanted
bits in the "unsigned long" data, but "ulong &= ~int" also clears the
upper bits that are not specified in mask.
This is actually fine, dr6[32:63] are reserved, but this is not clear
so it would be better to make them "unsigned long" to cleanup the code.
However, depending on sizeof(long), DR6_RESERVED should be either
0xFFFF0FF0 or 0xFFFFFFFF_FFFF0FF0, so this patch redefines them as
(~ 32_bit_mask UL) to avoid ifdef's.
Reported-by: Linus Torvalds <[email protected]>
Suggested-by: H. Peter Anvin <[email protected]>
Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/include/uapi/asm/debugreg.h | 8 ++------
1 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index 3c0874d..4ff5d05 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,7 @@
are either reserved or not of interest to us. */
/* Define reserved bits in DR6 which are always set to 1 */
-#define DR6_RESERVED (0xFFFF0FF0)
+#define DR6_RESERVED (~0xF00FUL)
#define DR_TRAP0 (0x1) /* db0 */
#define DR_TRAP1 (0x2) /* db1 */
@@ -64,11 +64,7 @@
We can slow the instruction pipeline for instructions coming via the
gdt or the ldt if we want to. I am not sure why this is an advantage */
-#ifdef __i386__
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
-#else
-#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
-#endif
+#define DR_CONTROL_RESERVED (~0xFFFF03FFUL) /* Reserved by Intel */
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
--
1.5.5.1
On 04/28/2013 10:27 AM, Oleg Nesterov wrote:
> On 04/28, Frederic Weisbecker wrote:
>>
>> On Sat, Apr 27, 2013 at 04:45:37PM +0200, Oleg Nesterov wrote:
>>>
>>> -/* Define reserved bits in DR6 which are always set to 1 */
>>> -#define DR6_RESERVED (0xFFFF0FF0)
>>> +#define DR6_MASK (0xF00FU) /* Everything else is reserved */
>>
>> I'm personally fine either with that or with Peter's suggestion to do:
>>
>> -#define DR6_RESERVED (0xFFFF0FF0)
>> +#define DR6_RESERVED (~0xF00FUL)
>
> I missed this suggestion...
>
> Yes, and this allows to kill ifdef too.
>
>> If this should stay stable UAPI,
>
> I do not know, but I guess it would be safer to keep the old define's.
>
>> I really don't mind.
>
> Oh, I do not mind too ;)
>
> OK, please see v3.
>
Looks good. However, given the timing, I would think this is 3.11
material unless we have a manifest bug at this point.
I have several bits like this that I'm going to queue up in a separate
tip branch.
-hpa
On 04/28, H. Peter Anvin wrote:
>
> On 04/28/2013 10:27 AM, Oleg Nesterov wrote:
>
> Looks good. However, given the timing, I would think this is 3.11
> material unless we have a manifest bug at this point.
Yes, yes, this is only cleanup.
> I have several bits like this that I'm going to queue up in a separate
> tip branch.
Great, thanks.
Oleg.
On 04/28/2013 10:39 AM, Oleg Nesterov wrote:
> On 04/28, H. Peter Anvin wrote:
>>
>> On 04/28/2013 10:27 AM, Oleg Nesterov wrote:
>>
>> Looks good. However, given the timing, I would think this is 3.11
>> material unless we have a manifest bug at this point.
>
> Yes, yes, this is only cleanup.
>
Thanks for tackling it. I just wanted to make sure you didn't wonder
why I didn't want to push it for 3.10.
-hpa