Subject: x86: A fast way to check capabilities of the current cpu


Subject: x86: A fast way to check capabilities of the current cpu

Add this_cpu_has() which determines if the current cpu has a certain
ability using a segment prefix and a bit test operation.

For that we need to add bit operations to x86s percpu.h.

Many uses of cpu_has use a pointer passed to a function to determine
the current flags. That is no longer necessary after this patch.

However, this patch only converts the straightforward cases where
cpu_has is used with this_cpu_ptr. The rest is work for later.

Signed-off-by: Christoph Lameter <[email protected]>

---
arch/x86/include/asm/cpufeature.h | 13 +++++++++----
arch/x86/include/asm/percpu.h | 27 +++++++++++++++++++++++++++
arch/x86/kernel/apic/apic.c | 2 +-
arch/x86/kernel/process.c | 4 ++--
arch/x86/kernel/smpboot.c | 4 ++--
5 files changed, 41 insertions(+), 9 deletions(-)

Index: linux-2.6/arch/x86/include/asm/cpufeature.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/cpufeature.h 2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/cpufeature.h 2010-12-15 12:54:48.000000000 -0600
@@ -206,8 +206,7 @@ extern const char * const x86_power_flag
#define test_cpu_cap(c, bit) \
test_bit(bit, (unsigned long *)((c)->x86_capability))

-#define cpu_has(c, bit) \
- (__builtin_constant_p(bit) && \
+#define REQUIRED_MASK_BIT_SET(bit) \
( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \
(((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \
(((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \
@@ -217,10 +216,16 @@ extern const char * const x86_power_flag
(((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \
(((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \
(((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \
- (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \
- ? 1 : \
+ (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
+
+#define cpu_has(c, bit) \
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
test_cpu_cap(c, bit))

+#define this_cpu_has(bit) \
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
+ this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)

#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
Index: linux-2.6/arch/x86/kernel/apic/apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/apic.c 2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/kernel/apic/apic.c 2010-12-15 12:38:53.000000000 -0600
@@ -516,7 +516,7 @@ static void __cpuinit setup_APIC_timer(v
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);

- if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
+ if (this_cpu_has(X86_FEATURE_ARAT)) {
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
/* Make LAPIC timer preferrable over percpu HPET */
lapic_clockevent.rating = 150;
Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h 2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h 2010-12-15 13:06:27.000000000 -0600
@@ -545,6 +545,33 @@ do { \
old__; \
})

+static __always_inline int this_cpu_constant_test_bit(unsigned int nr,
+ const unsigned long __percpu *addr)
+{
+ unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
+
+ return ((1UL << (nr % BITS_PER_LONG)) & percpu_read_stable(*a)) != 0;
+}
+
+static inline int this_cpu_variable_test_bit(int nr,
+ const unsigned long __percpu *addr)
+{
+ int oldbit;
+
+ asm volatile("bt "__percpu_arg(2)",%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr));
+
+ return oldbit;
+}
+
+#define this_cpu_test_bit(nr, addr) \
+ (__builtin_constant_p((nr)) \
+ ? this_cpu_constant_test_bit((nr), (addr)) \
+ : this_cpu_variable_test_bit((nr), (addr)))
+
+
#include <asm-generic/percpu.h>

/* We can use this directly for local CPU (faster). */
Index: linux-2.6/arch/x86/kernel/process.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/process.c 2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/kernel/process.c 2010-12-15 12:38:53.000000000 -0600
@@ -445,7 +445,7 @@ void mwait_idle_with_hints(unsigned long
{
trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
if (!need_resched()) {
- if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+ if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);

__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -460,7 +460,7 @@ static void mwait_idle(void)
{
if (!need_resched()) {
trace_power_start(POWER_CSTATE, 1, smp_processor_id());
- if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+ if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);

__monitor((void *)&current_thread_info()->flags, 0, 0);
Index: linux-2.6/arch/x86/kernel/smpboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smpboot.c 2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/kernel/smpboot.c 2010-12-15 12:38:53.000000000 -0600
@@ -1397,9 +1397,9 @@ static inline void mwait_play_dead(void)
int i;
void *mwait_ptr;

- if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT))
+ if (!this_cpu_has(X86_FEATURE_MWAIT))
return;
- if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
+ if (!this_cpu_has(X86_FEATURE_CLFLSH))
return;
if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
return;


Subject: x86: Avoid passing struct cpuinfo pointer to mce_available


Subject: x86: Avoid passing struct cpuinfo pointer to mce_available

If we do not pass the pointer to cpuinfio to mce available then its possible
to use this_cpu_has.

There are two use cases of mce_available: One with the current processor
and one with the boot cpu. Define a function for both cases. However, there
is only one case in which boot_mce_available is used. If we somehow can
get rid of that then the patch could be simplified.

Signed-off-by: Christoph Lameter <[email protected]>

---
arch/x86/include/asm/mce.h | 3 +-
arch/x86/kernel/cpu/mcheck/mce.c | 41 +++++++++++++++++++--------------
arch/x86/kernel/cpu/mcheck/mce_intel.c | 2 -
3 files changed, 27 insertions(+), 19 deletions(-)

Index: linux-2.6/arch/x86/include/asm/mce.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/mce.h 2010-12-15 13:25:37.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/mce.h 2010-12-15 13:25:57.000000000 -0600
@@ -177,7 +177,8 @@ void mce_amd_feature_init(struct cpuinfo
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif

-int mce_available(struct cpuinfo_x86 *c);
+int this_cpu_mce_available(void);
+int boot_mce_available(void);

DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count);
Index: linux-2.6/arch/x86/kernel/cpu/mcheck/mce.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mcheck/mce.c 2010-12-15 13:20:48.000000000 -0600
+++ linux-2.6/arch/x86/kernel/cpu/mcheck/mce.c 2010-12-15 13:33:19.000000000 -0600
@@ -434,11 +434,19 @@ static int mce_ring_add(unsigned long pf
return 0;
}

-int mce_available(struct cpuinfo_x86 *c)
+int this_cpu_mce_available(void)
{
if (mce_disabled)
return 0;
- return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+ return this_cpu_has(X86_FEATURE_MCE) && this_cpu_has(X86_FEATURE_MCA);
+}
+
+int boot_mce_available(struct cpuinfo_x86 *c)
+{
+ if (mce_disabled)
+ return 0;
+ return cpu_has(boot_cpu_data, X86_FEATURE_MCE) &&
+ cpu_has(boot_cpu_data, X86_FEATURE_MCA);
}

static void mce_schedule_work(void)
@@ -1159,7 +1167,7 @@ static void mce_start_timer(unsigned lon

WARN_ON(smp_processor_id() != data);

- if (mce_available(__this_cpu_ptr(&cpu_info))) {
+ if (this_cpu_mce_available()) {
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
}
@@ -1373,9 +1381,9 @@ static int __cpuinit __mcheck_cpu_apply_

static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
- if (c->x86 != 5)
+ if (this_cpu_read(cpu_info.x86) != 5)
return;
- switch (c->x86_vendor) {
+ switch (this_cpu_read(cpu_info.x86_vendor)) {
case X86_VENDOR_INTEL:
intel_p5_mcheck_init(c);
break;
@@ -1402,17 +1410,16 @@ static void __mcheck_cpu_init_vendor(str
static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
- int *n = &__get_cpu_var(mce_next_interval);

setup_timer(t, mce_start_timer, smp_processor_id());

if (mce_ignore_ce)
return;

- *n = check_interval * HZ;
- if (!*n)
+ this_cpu_write(mce_next_interval, check_interval * HZ);
+ if (!this_cpu_read(mce_next_interval))
return;
- t->expires = round_jiffies(jiffies + *n);
+ t->expires = round_jiffies(jiffies + this_cpu_read(mce_next_interval));
add_timer_on(t, smp_processor_id());
}

@@ -1438,7 +1445,7 @@ void __cpuinit mcheck_cpu_init(struct cp

__mcheck_cpu_ancient_init(c);

- if (!mce_available(c))
+ if (!this_cpu_mce_available())
return;

if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
@@ -1775,7 +1782,7 @@ static int mce_resume(struct sys_device
static void mce_cpu_restart(void *data)
{
del_timer_sync(&__get_cpu_var(mce_timer));
- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!this_cpu_mce_available())
return;
__mcheck_cpu_init_generic();
__mcheck_cpu_init_timer();
@@ -1790,7 +1797,7 @@ static void mce_restart(void)
/* Toggle features for corrected errors */
static void mce_disable_ce(void *all)
{
- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!this_cpu_mce_available())
return;
if (all)
del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1799,7 +1806,7 @@ static void mce_disable_ce(void *all)

static void mce_enable_ce(void *all)
{
- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!this_cpu_mce_available())
return;
cmci_reenable();
cmci_recheck();
@@ -1962,7 +1969,7 @@ static __cpuinit int mce_create_device(u
int err;
int i, j;

- if (!mce_available(&boot_cpu_data))
+ if (!boot_mce_available())
return -EIO;

memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
@@ -2022,7 +2029,7 @@ static void __cpuinit mce_disable_cpu(vo
unsigned long action = *(unsigned long *)h;
int i;

- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!this_cpu_mce_available())
return;

if (!(action & CPU_TASKS_FROZEN))
@@ -2040,7 +2047,7 @@ static void __cpuinit mce_reenable_cpu(v
unsigned long action = *(unsigned long *)h;
int i;

- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!this_cpu_mce_available())
return;

if (!(action & CPU_TASKS_FROZEN))
@@ -2122,7 +2129,7 @@ static __init int mcheck_init_device(voi
int err;
int i = 0;

- if (!mce_available(&boot_cpu_data))
+ if (!boot_mce_available())
return -EIO;

zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
Index: linux-2.6/arch/x86/kernel/cpu/mcheck/mce_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mcheck/mce_intel.c 2010-12-15 13:24:41.000000000 -0600
+++ linux-2.6/arch/x86/kernel/cpu/mcheck/mce_intel.c 2010-12-15 13:25:23.000000000 -0600
@@ -130,7 +130,7 @@ void cmci_recheck(void)
unsigned long flags;
int banks;

- if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
+ if (!this_cpu_mce_available() || !cmci_supported(&banks))
return;
local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));

2010-12-15 20:57:03

by Andrew Morton

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

On Wed, 15 Dec 2010 14:07:39 -0600 (CST)
Christoph Lameter <[email protected]> wrote:

> +#define cpu_has(c, bit) \
> + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
> test_cpu_cap(c, bit))
>
> +#define this_cpu_has(bit) \
> + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
> + this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
> +

Isn't

a ? 1 : b

a complex way of writing

a || b

?

2010-12-15 21:04:47

by H. Peter Anvin

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

On 12/15/2010 12:56 PM, Andrew Morton wrote:
> On Wed, 15 Dec 2010 14:07:39 -0600 (CST)
> Christoph Lameter <[email protected]> wrote:
>
>> +#define cpu_has(c, bit) \
>> + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
>> test_cpu_cap(c, bit))
>>
>> +#define this_cpu_has(bit) \
>> + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
>> + this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
>> +
>
> Isn't
>
> a ? 1 : b
>
> a complex way of writing
>
> a || b
>

Not if b is not a bool.

-hpa

2010-12-15 21:30:10

by Miguel Ojeda

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

On Wed, Dec 15, 2010 at 10:03 PM, H. Peter Anvin <[email protected]> wrote:
> On 12/15/2010 12:56 PM, Andrew Morton wrote:
>> On Wed, 15 Dec 2010 14:07:39 -0600 (CST)
>> Christoph Lameter <[email protected]> wrote:
>>
>>> +#define cpu_has(c, bit) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>>> + ? ?(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : ?\
>>> ? ? ? test_cpu_cap(c, bit))
>>>
>>> +#define this_cpu_has(bit) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>>> + ? ?(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : ?\
>>> + ? ? this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
>>> +
>>
>> Isn't
>>
>> ? ? ? a ? 1 : b
>>
>> a complex way of writing
>>
>> ? ? ? a || b
>>
>
> Not if b is not a bool.
>

In this case it this_cpu_*_test_bit() return an int, but they act as a
bool and are used in if()s; where is the catch?

> ? ? ? ?-hpa
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at ?http://www.tux.org/lkml/
>

2010-12-15 21:40:33

by H. Peter Anvin

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

On 12/15/2010 01:30 PM, Miguel Ojeda wrote:
>
> In this case it this_cpu_*_test_bit() return an int, but they act as a
> bool and are used in if()s; where is the catch?
>

If they aren't, and are stored in a variable for whatever reason, then
the || form will generate additional instructions to booleanize the
value for no good reason.

-hpa

2010-12-15 21:48:36

by Miguel Ojeda

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

On Wed, Dec 15, 2010 at 10:39 PM, H. Peter Anvin <[email protected]> wrote:
> On 12/15/2010 01:30 PM, Miguel Ojeda wrote:
>>
>> In this case it this_cpu_*_test_bit() return an int, but they act as a
>> bool and are used in if()s; where is the catch?
>>
>
> If they aren't, and are stored in a variable for whatever reason, then
> the || form will generate additional instructions to booleanize the
> value for no good reason.

Thanks! I suppose that is the 't' of being "a fast way" ;-)

>
> ? ? ? ?-hpa
>

2010-12-16 06:33:56

by Miles Bader

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

"H. Peter Anvin" <[email protected]> writes:
>> In this case it this_cpu_*_test_bit() return an int, but they act as a
>> bool and are used in if()s; where is the catch?
>
> If they aren't, and are stored in a variable for whatever reason, then
> the || form will generate additional instructions to booleanize the
> value for no good reason.

It doesn't actually have to "booleanize" the value if it's used in a
boolean context though (and, AFAICT, usually won't).

My vague impression is that when used in a boolean context, gcc will
often generate the same or "equivalent" code for both variants -- but
sometimes a||b seems to generate better code; e.g.:

static inline int test1a (int a, int b) { return a ? 1 : b; }
int test1b (int a, int b) { if (test1a (a,b)) return a+b; else return 37; }

static inline int test2a (int a, int b) { return a || b; }
int test2b (int a, int b) { if (test2a (a,b)) return a+b; else return 37; }

=>

test1b:
testl %edi, %edi
jne .L2
movl $37, %eax
testl %esi, %esi
jne .L2
rep
ret
.L2:
leal (%rsi,%rdi), %eax
ret

test2b:
leal (%rsi,%rdi), %edx
movl $37, %eax
orl %edi, %esi
cmovne %edx, %eax
ret

.ident "GCC: (Debian 4.5.1-8) 4.5.1"


-Miles

--
Is it true that nothing can be known? If so how do we know this? -Woody Allen

2010-12-16 10:17:27

by Miguel Ojeda

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

On Thu, Dec 16, 2010 at 7:25 AM, Miles Bader <[email protected]> wrote:
> "H. Peter Anvin" <[email protected]> writes:
>>> In this case it this_cpu_*_test_bit() return an int, but they act as a
>>> bool and are used in if()s; where is the catch?
>>
>> If they aren't, and are stored in a variable for whatever reason, then
>> the || form will generate additional instructions to booleanize the
>> value for no good reason.
>
> It doesn't actually have to "booleanize" the value if it's used in a
> boolean context though (and, AFAICT, usually won't).
>
> My vague impression is that when used in a boolean context, gcc will
> often generate the same or "equivalent" code for both variants -- but
> sometimes a||b seems to generate better code; e.g.:
>
> ? static inline int test1a (int a, int b) { return a ? 1 : b; }
> ? int test1b (int a, int b) { if (test1a (a,b)) return a+b; else return 37; }
>
> ? static inline int test2a (int a, int b) { return a || b; }
> ? int test2b (int a, int b) { if (test2a (a,b)) return a+b; else return 37; }
>

I think hpa was talking about some code where gcc can not optimize out
the assignment (e.g. volatile, complex code, using the int outside
conditional expressions, etc.).

>=>
>
> test1b:
> ? ? ? ?testl ? %edi, %edi
> ? ? ? ?jne ? ? .L2
> ? ? ? ?movl ? ?$37, %eax
> ? ? ? ?testl ? %esi, %esi
> ? ? ? ?jne ? ? .L2
> ? ? ? ?rep
> ? ? ? ?ret
> .L2:
> ? ? ? ?leal ? ?(%rsi,%rdi), %eax
> ? ? ? ?ret
>
> test2b:
> ? ? ? ?leal ? ?(%rsi,%rdi), %edx
> ? ? ? ?movl ? ?$37, %eax
> ? ? ? ?orl ? ? %edi, %esi
> ? ? ? ?cmovne ?%edx, %eax
> ? ? ? ?ret
>
> ? ? ? ?.ident ?"GCC: (Debian 4.5.1-8) 4.5.1"
>
>
> -Miles
>
> --
> Is it true that nothing can be known? ?If so how do we know this? ?-Woody Allen
>

2010-12-16 10:37:36

by Miles Bader

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

Miguel Ojeda <[email protected]> writes:
>>> If they aren't, and are stored in a variable for whatever reason, then
>>> the || form will generate additional instructions to booleanize the
>>> value for no good reason.
>
> I think hpa was talking about some code where gcc can not optimize out
> the assignment (e.g. volatile, complex code, using the int outside
> conditional expressions, etc.).

Sure, but that seems to assume that the alternatives are otherwise
equivalent in the common case, when used in a boolean context.

If that's not true then one risks pessimizing the common case to make an
uncommon case more efficient.

-Miles

--
Suburbia: where they tear out the trees and then name streets after them.

2010-12-16 15:39:32

by H. Peter Anvin

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

On 12/16/2010 02:29 AM, Miles Bader wrote:
> Miguel Ojeda <[email protected]> writes:
>>>> If they aren't, and are stored in a variable for whatever reason, then
>>>> the || form will generate additional instructions to booleanize the
>>>> value for no good reason.
>>
>> I think hpa was talking about some code where gcc can not optimize out
>> the assignment (e.g. volatile, complex code, using the int outside
>> conditional expressions, etc.).
>
> Sure, but that seems to assume that the alternatives are otherwise
> equivalent in the common case, when used in a boolean context.
>
> If that's not true then one risks pessimizing the common case to make an
> uncommon case more efficient.
>

The alternatives are equivalent when used in the common context. Your
examples are bogus, because they don't account for the
__builtin_constant_p().

-hpa

--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.

2010-12-17 04:28:58

by Miles Bader

[permalink] [raw]
Subject: Re: x86: A fast way to check capabilities of the current cpu

"H. Peter Anvin" <[email protected]> writes:
> The alternatives are equivalent when used in the common context. Your
> examples are bogus, because they don't account for the
> __builtin_constant_p().

Ah, true ... :}

-miles

--
Abstainer, n. A weak person who yields to the temptation of denying himself a
pleasure. A total abstainer is one who abstains from everything but
abstention, and especially from inactivity in the affairs of others.