2015-07-26 00:16:05

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH] x86: Introduce ASM flags to bitops

From: Uros Bizjak <[email protected]>

This patch introduces GCC ASM flags to bitops. Instead of e.g.

136d7: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
136de: 00
136df: 19 ff sbb %edi,%edi
136e1: 85 ff test %edi,%edi
136e3: 0f 95 c0 setne %al

following code is generated:

13767: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
1376e: 00
1376f: 0f 92 c0 setb %al

Similar improvement can be seen in following code:

7a6c: 48 0f a3 11 bt %rdx,(%rcx)
7a70: 19 d2 sbb %edx,%edx
7a72: 85 d2 test %edx,%edx
7a74: 74 eb je 7a61

which becomes:

7a8c: 48 0f a3 11 bt %rdx,(%rcx)
7a90: 73 ef jae 7a81

Signed-off-by: Uros Bizjak <[email protected]>
---
arch/x86/include/asm/bitops.h | 26 ++++++++++++++++++++++++--
arch/x86/include/asm/percpu.h | 18 +++++++++++++++++-
arch/x86/include/asm/signal.h | 6 ++++++
arch/x86/include/asm/sync_bitops.h | 18 ++++++++++++++++++
4 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index cfe3b95..fb64933 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -232,10 +232,16 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ asm("bts %2,%1"
+ : "=@ccc" (oldbit), ADDR
+ : "Ir" (nr));
+#else
asm("bts %2,%1\n\t"
"sbb %0,%0"
: "=r" (oldbit), ADDR
: "Ir" (nr));
+#endif
return oldbit;
}

@@ -272,10 +278,16 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ asm volatile("btr %2,%1"
+ : "=@ccc" (oldbit), ADDR
+ : "Ir" (nr));
+#else
asm volatile("btr %2,%1\n\t"
"sbb %0,%0"
: "=r" (oldbit), ADDR
: "Ir" (nr));
+#endif
return oldbit;
}

@@ -284,11 +296,16 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ asm volatile("btc %2,%1"
+ : "=@ccc" (oldbit), ADDR
+ : "Ir" (nr) : "memory");
+#else
asm volatile("btc %2,%1\n\t"
"sbb %0,%0"
: "=r" (oldbit), ADDR
: "Ir" (nr) : "memory");
-
+#endif
return oldbit;
}

@@ -315,11 +332,16 @@ static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
int oldbit;

+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ asm volatile("bt %2,%1"
+ : "=@ccc" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr));
+#else
asm volatile("bt %2,%1\n\t"
"sbb %0,%0"
: "=r" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
-
+#endif
return oldbit;
}

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e0ba66c..0f8302c 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -508,6 +508,16 @@ do { \
#endif

/* This is not atomic against other CPUs -- CPU preemption needs to be off */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+#define x86_test_and_clear_bit_percpu(bit, var) \
+({ \
+ int old__; \
+ asm volatile("btr %2,"__percpu_arg(1) \
+ : "=@ccc" (old__), "+m" (var) \
+ : "dIr" (bit)); \
+ old__; \
+})
+#else
#define x86_test_and_clear_bit_percpu(bit, var) \
({ \
int old__; \
@@ -516,6 +526,7 @@ do { \
: "dIr" (bit)); \
old__; \
})
+#endif

static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
const unsigned long __percpu *addr)
@@ -534,11 +545,16 @@ static inline int x86_this_cpu_variable_test_bit(int nr,
{
int oldbit;

+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ asm volatile("bt "__percpu_arg(2)",%1"
+ : "=@ccc" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr));
+#else
asm volatile("bt "__percpu_arg(2)",%1\n\t"
"sbb %0,%0"
: "=r" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
-
+#endif
return oldbit;
}

diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 31eab86..0e35376 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -82,8 +82,14 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
static inline int __gen_sigismember(sigset_t *set, int _sig)
{
int ret;
+
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ asm("btl %2,%1"
+ : "=@ccc"(ret) : "m"(*set), "Ir"(_sig-1));
+#else
asm("btl %2,%1\n\tsbbl %0,%0"
: "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+#endif
return ret;
}

diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index f28a24b..b690992 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -81,9 +81,15 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ asm volatile("lock; bts %2,%1"
+ : "=@ccc" (oldbit), "+m" (ADDR)
+ : "Ir" (nr) : "memory");
+#else
asm volatile("lock; bts %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit), "+m" (ADDR)
: "Ir" (nr) : "memory");
+#endif
return oldbit;
}

@@ -99,9 +105,15 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ asm volatile("lock; btr %2,%1"
+ : "=@ccc" (oldbit), "+m" (ADDR)
+ : "Ir" (nr) : "memory");
+#else
asm volatile("lock; btr %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit), "+m" (ADDR)
: "Ir" (nr) : "memory");
+#endif
return oldbit;
}

@@ -117,9 +129,15 @@ static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr
{
int oldbit;

+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ asm volatile("lock; btc %2,%1"
+ : "=@ccc" (oldbit), "+m" (ADDR)
+ : "Ir" (nr) : "memory");
+#else
asm volatile("lock; btc %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit), "+m" (ADDR)
: "Ir" (nr) : "memory");
+#endif
return oldbit;
}

--
2.4.3


2015-07-27 15:40:43

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] x86: Introduce ASM flags to bitops


* Uros Bizjak <[email protected]> wrote:

> From: Uros Bizjak <[email protected]>
>
> This patch introduces GCC ASM flags to bitops. Instead of e.g.
>
> 136d7: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
> 136de: 00
> 136df: 19 ff sbb %edi,%edi
> 136e1: 85 ff test %edi,%edi
> 136e3: 0f 95 c0 setne %al
>
> following code is generated:
>
> 13767: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
> 1376e: 00
> 1376f: 0f 92 c0 setb %al
>
> Similar improvement can be seen in following code:
>
> 7a6c: 48 0f a3 11 bt %rdx,(%rcx)
> 7a70: 19 d2 sbb %edx,%edx
> 7a72: 85 d2 test %edx,%edx
> 7a74: 74 eb je 7a61
>
> which becomes:
>
> 7a8c: 48 0f a3 11 bt %rdx,(%rcx)
> 7a90: 73 ef jae 7a81
>
> Signed-off-by: Uros Bizjak <[email protected]>
> ---
> arch/x86/include/asm/bitops.h | 26 ++++++++++++++++++++++++--
> arch/x86/include/asm/percpu.h | 18 +++++++++++++++++-
> arch/x86/include/asm/signal.h | 6 ++++++
> arch/x86/include/asm/sync_bitops.h | 18 ++++++++++++++++++
> 4 files changed, 65 insertions(+), 3 deletions(-)

Nothing in your patch seems to be setting __GCC_ASM_FLAG_OUTPUTS__, and the patch
does not seem to be mailed as part of a larger series ...

So how is this supposed to work?

Thanks,

Ingo

2015-07-27 15:50:15

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH] x86: Introduce ASM flags to bitops

On Mon, Jul 27, 2015 at 5:40 PM, Ingo Molnar <[email protected]> wrote:
>
> * Uros Bizjak <[email protected]> wrote:
>
>> From: Uros Bizjak <[email protected]>
>>
>> This patch introduces GCC ASM flags to bitops. Instead of e.g.
>>
>> 136d7: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
>> 136de: 00
>> 136df: 19 ff sbb %edi,%edi
>> 136e1: 85 ff test %edi,%edi
>> 136e3: 0f 95 c0 setne %al
>>
>> following code is generated:
>>
>> 13767: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
>> 1376e: 00
>> 1376f: 0f 92 c0 setb %al
>>
>> Similar improvement can be seen in following code:
>>
>> 7a6c: 48 0f a3 11 bt %rdx,(%rcx)
>> 7a70: 19 d2 sbb %edx,%edx
>> 7a72: 85 d2 test %edx,%edx
>> 7a74: 74 eb je 7a61
>>
>> which becomes:
>>
>> 7a8c: 48 0f a3 11 bt %rdx,(%rcx)
>> 7a90: 73 ef jae 7a81
>>
>> Signed-off-by: Uros Bizjak <[email protected]>
>> ---
>> arch/x86/include/asm/bitops.h | 26 ++++++++++++++++++++++++--
>> arch/x86/include/asm/percpu.h | 18 +++++++++++++++++-
>> arch/x86/include/asm/signal.h | 6 ++++++
>> arch/x86/include/asm/sync_bitops.h | 18 ++++++++++++++++++
>> 4 files changed, 65 insertions(+), 3 deletions(-)
>
> Nothing in your patch seems to be setting __GCC_ASM_FLAG_OUTPUTS__, and the patch
> does not seem to be mailed as part of a larger series ...
>
> So how is this supposed to work?

GCC version 6+ will automatically define __GCC_ASM_FLAG_OUTPUTS__ when
this feature is supported. Please see [1] for RFC GCC patch series and
[2] for final committed patch.

[1] https://gcc.gnu.org/ml/gcc-patches/2015-05/msg00594.html
[2] https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02087.html

Uros.

2015-07-27 15:54:49

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] x86: Introduce ASM flags to bitops


* Uros Bizjak <[email protected]> wrote:

> On Mon, Jul 27, 2015 at 5:40 PM, Ingo Molnar <[email protected]> wrote:
> >
> > * Uros Bizjak <[email protected]> wrote:
> >
> >> From: Uros Bizjak <[email protected]>
> >>
> >> This patch introduces GCC ASM flags to bitops. Instead of e.g.
> >>
> >> 136d7: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
> >> 136de: 00
> >> 136df: 19 ff sbb %edi,%edi
> >> 136e1: 85 ff test %edi,%edi
> >> 136e3: 0f 95 c0 setne %al
> >>
> >> following code is generated:
> >>
> >> 13767: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
> >> 1376e: 00
> >> 1376f: 0f 92 c0 setb %al
> >>
> >> Similar improvement can be seen in following code:
> >>
> >> 7a6c: 48 0f a3 11 bt %rdx,(%rcx)
> >> 7a70: 19 d2 sbb %edx,%edx
> >> 7a72: 85 d2 test %edx,%edx
> >> 7a74: 74 eb je 7a61
> >>
> >> which becomes:
> >>
> >> 7a8c: 48 0f a3 11 bt %rdx,(%rcx)
> >> 7a90: 73 ef jae 7a81
> >>
> >> Signed-off-by: Uros Bizjak <[email protected]>
> >> ---
> >> arch/x86/include/asm/bitops.h | 26 ++++++++++++++++++++++++--
> >> arch/x86/include/asm/percpu.h | 18 +++++++++++++++++-
> >> arch/x86/include/asm/signal.h | 6 ++++++
> >> arch/x86/include/asm/sync_bitops.h | 18 ++++++++++++++++++
> >> 4 files changed, 65 insertions(+), 3 deletions(-)
> >
> > Nothing in your patch seems to be setting __GCC_ASM_FLAG_OUTPUTS__, and the patch
> > does not seem to be mailed as part of a larger series ...
> >
> > So how is this supposed to work?
>
> GCC version 6+ will automatically define __GCC_ASM_FLAG_OUTPUTS__ when
> this feature is supported. Please see [1] for RFC GCC patch series and
> [2] for final committed patch.
>
> [1] https://gcc.gnu.org/ml/gcc-patches/2015-05/msg00594.html
> [2] https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02087.html

Ok, great. This information should be part of the changelog and such, as it's not
obvious.

Does the GCC project treat this as an ABI kind of thing, i.e. can the kernel rely
on it from now on, without the GCC side semantics of this feature not ever
changing and breaking the kernel?

Thanks,

Ingo

2015-07-27 15:59:04

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH] x86: Introduce ASM flags to bitops

On Mon, Jul 27, 2015 at 5:54 PM, Ingo Molnar <[email protected]> wrote:
>
> * Uros Bizjak <[email protected]> wrote:
>
>> On Mon, Jul 27, 2015 at 5:40 PM, Ingo Molnar <[email protected]> wrote:
>> >
>> > * Uros Bizjak <[email protected]> wrote:
>> >
>> >> From: Uros Bizjak <[email protected]>
>> >>
>> >> This patch introduces GCC ASM flags to bitops. Instead of e.g.
>> >>
>> >> 136d7: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
>> >> 136de: 00
>> >> 136df: 19 ff sbb %edi,%edi
>> >> 136e1: 85 ff test %edi,%edi
>> >> 136e3: 0f 95 c0 setne %al
>> >>
>> >> following code is generated:
>> >>
>> >> 13767: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
>> >> 1376e: 00
>> >> 1376f: 0f 92 c0 setb %al
>> >>
>> >> Similar improvement can be seen in following code:
>> >>
>> >> 7a6c: 48 0f a3 11 bt %rdx,(%rcx)
>> >> 7a70: 19 d2 sbb %edx,%edx
>> >> 7a72: 85 d2 test %edx,%edx
>> >> 7a74: 74 eb je 7a61
>> >>
>> >> which becomes:
>> >>
>> >> 7a8c: 48 0f a3 11 bt %rdx,(%rcx)
>> >> 7a90: 73 ef jae 7a81
>> >>
>> >> Signed-off-by: Uros Bizjak <[email protected]>
>> >> ---
>> >> arch/x86/include/asm/bitops.h | 26 ++++++++++++++++++++++++--
>> >> arch/x86/include/asm/percpu.h | 18 +++++++++++++++++-
>> >> arch/x86/include/asm/signal.h | 6 ++++++
>> >> arch/x86/include/asm/sync_bitops.h | 18 ++++++++++++++++++
>> >> 4 files changed, 65 insertions(+), 3 deletions(-)
>> >
>> > Nothing in your patch seems to be setting __GCC_ASM_FLAG_OUTPUTS__, and the patch
>> > does not seem to be mailed as part of a larger series ...
>> >
>> > So how is this supposed to work?
>>
>> GCC version 6+ will automatically define __GCC_ASM_FLAG_OUTPUTS__ when
>> this feature is supported. Please see [1] for RFC GCC patch series and
>> [2] for final committed patch.
>>
>> [1] https://gcc.gnu.org/ml/gcc-patches/2015-05/msg00594.html
>> [2] https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02087.html
>
> Ok, great. This information should be part of the changelog and such, as it's not
> obvious.

No problem, I'll add this information and send a v2 patch.

> Does the GCC project treat this as an ABI kind of thing, i.e. can the kernel rely
> on it from now on, without the GCC side semantics of this feature not ever
> changing and breaking the kernel?

Yes. It was discussed and agreed between GCC and kernel people (HPA)
on GCC mailing list. Please see [3].

[3] https://gcc.gnu.org/ml/gcc-patches/2015-05/msg00725.html

Uros.

2015-07-27 16:13:45

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] x86: Introduce ASM flags to bitops


* Uros Bizjak <[email protected]> wrote:

> On Mon, Jul 27, 2015 at 5:54 PM, Ingo Molnar <[email protected]> wrote:
> >
> > * Uros Bizjak <[email protected]> wrote:
> >
> >> On Mon, Jul 27, 2015 at 5:40 PM, Ingo Molnar <[email protected]> wrote:
> >> >
> >> > * Uros Bizjak <[email protected]> wrote:
> >> >
> >> >> From: Uros Bizjak <[email protected]>
> >> >>
> >> >> This patch introduces GCC ASM flags to bitops. Instead of e.g.
> >> >>
> >> >> 136d7: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
> >> >> 136de: 00
> >> >> 136df: 19 ff sbb %edi,%edi
> >> >> 136e1: 85 ff test %edi,%edi
> >> >> 136e3: 0f 95 c0 setne %al
> >> >>
> >> >> following code is generated:
> >> >>
> >> >> 13767: 48 0f a3 3d 00 00 00 bt %rdi,0x0(%rip)
> >> >> 1376e: 00
> >> >> 1376f: 0f 92 c0 setb %al
> >> >>
> >> >> Similar improvement can be seen in following code:
> >> >>
> >> >> 7a6c: 48 0f a3 11 bt %rdx,(%rcx)
> >> >> 7a70: 19 d2 sbb %edx,%edx
> >> >> 7a72: 85 d2 test %edx,%edx
> >> >> 7a74: 74 eb je 7a61
> >> >>
> >> >> which becomes:
> >> >>
> >> >> 7a8c: 48 0f a3 11 bt %rdx,(%rcx)
> >> >> 7a90: 73 ef jae 7a81
> >> >>
> >> >> Signed-off-by: Uros Bizjak <[email protected]>
> >> >> ---
> >> >> arch/x86/include/asm/bitops.h | 26 ++++++++++++++++++++++++--
> >> >> arch/x86/include/asm/percpu.h | 18 +++++++++++++++++-
> >> >> arch/x86/include/asm/signal.h | 6 ++++++
> >> >> arch/x86/include/asm/sync_bitops.h | 18 ++++++++++++++++++
> >> >> 4 files changed, 65 insertions(+), 3 deletions(-)
> >> >
> >> > Nothing in your patch seems to be setting __GCC_ASM_FLAG_OUTPUTS__, and the patch
> >> > does not seem to be mailed as part of a larger series ...
> >> >
> >> > So how is this supposed to work?
> >>
> >> GCC version 6+ will automatically define __GCC_ASM_FLAG_OUTPUTS__ when
> >> this feature is supported. Please see [1] for RFC GCC patch series and
> >> [2] for final committed patch.
> >>
> >> [1] https://gcc.gnu.org/ml/gcc-patches/2015-05/msg00594.html
> >> [2] https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02087.html
> >
> > Ok, great. This information should be part of the changelog and such, as it's not
> > obvious.
>
> No problem, I'll add this information and send a v2 patch.
>
> > Does the GCC project treat this as an ABI kind of thing, i.e. can the kernel rely
> > on it from now on, without the GCC side semantics of this feature not ever
> > changing and breaking the kernel?
>
> Yes. It was discussed and agreed between GCC and kernel people (HPA)
> on GCC mailing list. Please see [3].
>
> [3] https://gcc.gnu.org/ml/gcc-patches/2015-05/msg00725.html

Ok, great!

One more request: please make it similar to how we handle other, similar compiler
features and introduce an interim macro like:

#ifdef __GCC_ASM_FLAG_OUTPUTS__
# define CC_HAVE_ASM_FLAG_OUTPUTS
#endif

in a suitable spot in include/linux/compiler-gcc.h, and use
CC_HAVE_ASM_FLAG_OUTPUTS in the other places.

this makes it easier to disable/enable this feature centrally, and it might enable
other compilers to introduce a similar feature.

Thanks,

Ingo