2011-02-24 18:44:49

by Stephen Boyd

[permalink] [raw]
Subject: [PATCH 0/4] SCM fixes and updates

These are a few updates to SCM. The first two patches fix some
bad code generation. The next patch saves a couple instructions
on the slow path and the final patch determines the cacheline
size dynamically instead of statically.

Stephen Boyd (4):
msm: scm: Mark inline asm as volatile
msm: scm: Fix improper register assignment
msm: scm: Check for interruption immediately
msm: scm: Get cacheline size from CTR

arch/arm/mach-msm/scm.c | 75 +++++++++++++++++++++++++++-------------------
1 files changed, 44 insertions(+), 31 deletions(-)

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


2011-02-24 18:44:52

by Stephen Boyd

[permalink] [raw]
Subject: [PATCH 1/4] msm: scm: Mark inline asm as volatile

We don't want the compiler to remove these asm statements or
reorder them in any way. Mark them as volatile to be sure.

Signed-off-by: Stephen Boyd <[email protected]>
---
arch/arm/mach-msm/scm.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
index f4b9bc9..ba57b5a 100644
--- a/arch/arm/mach-msm/scm.c
+++ b/arch/arm/mach-msm/scm.c
@@ -174,7 +174,7 @@ static u32 smc(u32 cmd_addr)
register u32 r0 asm("r0") = 1;
register u32 r1 asm("r1") = (u32)&context_id;
register u32 r2 asm("r2") = cmd_addr;
- asm(
+ asm volatile(
__asmeq("%0", "r0")
__asmeq("%1", "r0")
__asmeq("%2", "r1")
@@ -271,7 +271,7 @@ u32 scm_get_version(void)
return version;

mutex_lock(&scm_lock);
- asm(
+ asm volatile(
__asmeq("%0", "r1")
__asmeq("%1", "r0")
__asmeq("%2", "r1")
--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-24 18:44:56

by Stephen Boyd

[permalink] [raw]
Subject: [PATCH 2/4] msm: scm: Fix improper register assignment

Assign the registers used in the inline assembly immediately
before the inline assembly block. This ensures the compiler
doesn't optimize away dead register assignments when it
shouldn't.

Signed-off-by: Stephen Boyd <[email protected]>
---
arch/arm/mach-msm/scm.c | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
index ba57b5a..5eddf54 100644
--- a/arch/arm/mach-msm/scm.c
+++ b/arch/arm/mach-msm/scm.c
@@ -264,13 +264,16 @@ u32 scm_get_version(void)
{
int context_id;
static u32 version = -1;
- register u32 r0 asm("r0") = 0x1 << 8;
- register u32 r1 asm("r1") = (u32)&context_id;
+ register u32 r0 asm("r0");
+ register u32 r1 asm("r1");

if (version != -1)
return version;

mutex_lock(&scm_lock);
+
+ r0 = 0x1 << 8;
+ r1 = (u32)&context_id;
asm volatile(
__asmeq("%0", "r1")
__asmeq("%1", "r0")
--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-24 18:45:19

by Stephen Boyd

[permalink] [raw]
Subject: [PATCH 4/4] msm: scm: Get cacheline size from CTR

Instead of hardcoding the cacheline size as 32, get the cacheline
size from the CTR register.

Signed-off-by: Stephen Boyd <[email protected]>
---
arch/arm/mach-msm/scm.c | 17 ++++++++++++-----
1 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
index cfa808d..0528c71 100644
--- a/arch/arm/mach-msm/scm.c
+++ b/arch/arm/mach-msm/scm.c
@@ -26,9 +26,6 @@

#include "scm.h"

-/* Cache line size for msm8x60 */
-#define CACHELINESIZE 32
-
#define SCM_ENOMEM -5
#define SCM_EOPNOTSUPP -4
#define SCM_EINVAL_ADDR -3
@@ -207,6 +204,14 @@ static int __scm_call(const struct scm_command *cmd)
return ret;
}

+static inline u32 dcache_line_size(void)
+{
+ u32 ctr;
+
+ asm volatile("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr));
+ return 4 << ((ctr >> 16) & 0xf);
+}
+
/**
* scm_call() - Send an SCM command
* @svc_id: service identifier
@@ -243,11 +248,13 @@ int scm_call(u32 svc_id, u32 cmd_id, const void *cmd_buf, size_t cmd_len,
do {
u32 start = (u32)rsp;
u32 end = (u32)scm_get_response_buffer(rsp) + resp_len;
- start &= ~(CACHELINESIZE - 1);
+ u32 cacheline_size = dcache_line_size();
+
+ start &= ~(cacheline_size - 1);
while (start < end) {
asm ("mcr p15, 0, %0, c7, c6, 1" : : "r" (start)
: "memory");
- start += CACHELINESIZE;
+ start += cacheline_size;
}
} while (!rsp->is_complete);

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-24 18:45:23

by Stephen Boyd

[permalink] [raw]
Subject: [PATCH 3/4] msm: scm: Check for interruption immediately

When we're interrupted on the secure side, we should just issue
another smc instruction again instead of replaying the arguments
to smc. Fix it.

Signed-off-by: Stephen Boyd <[email protected]>
---
arch/arm/mach-msm/scm.c | 51 ++++++++++++++++++++++++----------------------
1 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
index 5eddf54..cfa808d 100644
--- a/arch/arm/mach-msm/scm.c
+++ b/arch/arm/mach-msm/scm.c
@@ -174,15 +174,18 @@ static u32 smc(u32 cmd_addr)
register u32 r0 asm("r0") = 1;
register u32 r1 asm("r1") = (u32)&context_id;
register u32 r2 asm("r2") = cmd_addr;
- asm volatile(
- __asmeq("%0", "r0")
- __asmeq("%1", "r0")
- __asmeq("%2", "r1")
- __asmeq("%3", "r2")
- "smc #0 @ switch to secure world\n"
- : "=r" (r0)
- : "r" (r0), "r" (r1), "r" (r2)
- : "r3");
+ do {
+ asm volatile(
+ __asmeq("%0", "r0")
+ __asmeq("%1", "r0")
+ __asmeq("%2", "r1")
+ __asmeq("%3", "r2")
+ "smc #0 @ switch to secure world\n"
+ : "=r" (r0)
+ : "r" (r0), "r" (r1), "r" (r2)
+ : "r3");
+ } while (r0 == SCM_INTERRUPTED);
+
return r0;
}

@@ -197,13 +200,9 @@ static int __scm_call(const struct scm_command *cmd)
* side in the buffer.
*/
flush_cache_all();
- do {
- ret = smc(cmd_addr);
- if (ret < 0) {
- ret = scm_remap_error(ret);
- break;
- }
- } while (ret == SCM_INTERRUPTED);
+ ret = smc(cmd_addr);
+ if (ret < 0)
+ ret = scm_remap_error(ret);

return ret;
}
@@ -274,14 +273,18 @@ u32 scm_get_version(void)

r0 = 0x1 << 8;
r1 = (u32)&context_id;
- asm volatile(
- __asmeq("%0", "r1")
- __asmeq("%1", "r0")
- __asmeq("%2", "r1")
- "smc #0 @ switch to secure world\n"
- : "=r" (r1)
- : "r" (r0), "r" (r1)
- : "r2", "r3");
+ do {
+ asm volatile(
+ __asmeq("%0", "r0")
+ __asmeq("%1", "r1")
+ __asmeq("%2", "r0")
+ __asmeq("%3", "r1")
+ "smc #0 @ switch to secure world\n"
+ : "=r" (r0), "=r" (r1)
+ : "r" (r0), "r" (r1)
+ : "r2", "r3");
+ } while (r0 == SCM_INTERRUPTED);
+
version = r1;
mutex_unlock(&scm_lock);

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-24 19:01:39

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 4/4] msm: scm: Get cacheline size from CTR

On Thu, 24 Feb 2011, Stephen Boyd wrote:

> Instead of hardcoding the cacheline size as 32, get the cacheline
> size from the CTR register.
>
> Signed-off-by: Stephen Boyd <[email protected]>
> ---
> arch/arm/mach-msm/scm.c | 17 ++++++++++++-----
> 1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
> index cfa808d..0528c71 100644
> --- a/arch/arm/mach-msm/scm.c
> +++ b/arch/arm/mach-msm/scm.c
> @@ -26,9 +26,6 @@
>
> #include "scm.h"
>
> -/* Cache line size for msm8x60 */
> -#define CACHELINESIZE 32
> -
> #define SCM_ENOMEM -5
> #define SCM_EOPNOTSUPP -4
> #define SCM_EINVAL_ADDR -3
> @@ -207,6 +204,14 @@ static int __scm_call(const struct scm_command *cmd)
> return ret;
> }
>
> +static inline u32 dcache_line_size(void)
> +{
> + u32 ctr;
> +
> + asm volatile("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr));
> + return 4 << ((ctr >> 16) & 0xf);
> +}
> +
> /**
> * scm_call() - Send an SCM command
> * @svc_id: service identifier
> @@ -243,11 +248,13 @@ int scm_call(u32 svc_id, u32 cmd_id, const void *cmd_buf, size_t cmd_len,
> do {
> u32 start = (u32)rsp;
> u32 end = (u32)scm_get_response_buffer(rsp) + resp_len;
> - start &= ~(CACHELINESIZE - 1);
> + u32 cacheline_size = dcache_line_size();

And why do you want to do that on every scm_call() invocation and on
every loop of that code? If your dcache_line_size() changes at
runtime, then you might have other problems.

Thanks,

tglx

2011-02-24 19:33:39

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 4/4] msm: scm: Get cacheline size from CTR

Hello.

Stephen Boyd wrote:

> Instead of hardcoding the cacheline size as 32, get the cacheline
> size from the CTR register.

> Signed-off-by: Stephen Boyd <[email protected]>
> ---
> arch/arm/mach-msm/scm.c | 17 ++++++++++++-----
> 1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
> index cfa808d..0528c71 100644
> --- a/arch/arm/mach-msm/scm.c
> +++ b/arch/arm/mach-msm/scm.c
[...]
> @@ -207,6 +204,14 @@ static int __scm_call(const struct scm_command *cmd)
> return ret;
> }
>
> +static inline u32 dcache_line_size(void)
> +{
> + u32 ctr;
> +
> + asm volatile("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr));
> + return 4 << ((ctr >> 16) & 0xf);
> +}

Won't generic cache_line_size() macro do instead? It's defined as
L1_CACHE_BYTES.

WBR, Sergei

2011-02-24 19:44:09

by Stephen Boyd

[permalink] [raw]
Subject: Re: [PATCH 4/4] msm: scm: Get cacheline size from CTR

On 02/24/2011 11:01 AM, Thomas Gleixner wrote:
> On Thu, 24 Feb 2011, Stephen Boyd wrote:
>
>>
>> /**
>> * scm_call() - Send an SCM command
>> * @svc_id: service identifier
>> @@ -243,11 +248,13 @@ int scm_call(u32 svc_id, u32 cmd_id, const void *cmd_buf, size_t cmd_len,
>> do {
>> u32 start = (u32)rsp;
>> u32 end = (u32)scm_get_response_buffer(rsp) + resp_len;
>> - start &= ~(CACHELINESIZE - 1);
>> + u32 cacheline_size = dcache_line_size();
>
> And why do you want to do that on every scm_call() invocation and on
> every loop of that code? If your dcache_line_size() changes at
> runtime, then you might have other problems.

I definitely don't want to do it for every loop. I'm fine with getting
it every scm_call() invocation though.

For now, I'll pull the end and cacheline_size variables out of the
do-while loop.

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-24 19:50:51

by Stephen Boyd

[permalink] [raw]
Subject: Re: [PATCH 4/4] msm: scm: Get cacheline size from CTR

On 02/24/2011 11:32 AM, Sergei Shtylyov wrote:
> Stephen Boyd wrote:
>
>> @@ -207,6 +204,14 @@ static int __scm_call(const struct scm_command
>> *cmd)
>> return ret;
>> }
>>
>> +static inline u32 dcache_line_size(void)
>> +{
>> + u32 ctr;
>> +
>> + asm volatile("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr));
>> + return 4 << ((ctr >> 16) & 0xf);
>> +}
>
> Won't generic cache_line_size() macro do instead? It's defined as
> L1_CACHE_BYTES.
>

Interesting. It would be the same value (32) but I'm not sure how
multi-platform friendly that will be since L1_CACHE_BYTES is (1 <<
CONFIG_ARM_L1_CACHE_SHIFT). I suppose we can punt supporting platforms
with different cache line sizes in one kernel for another day.

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-24 19:55:48

by Russell King - ARM Linux

[permalink] [raw]
Subject: Re: [PATCH 4/4] msm: scm: Get cacheline size from CTR

On Thu, Feb 24, 2011 at 10:32:06PM +0300, Sergei Shtylyov wrote:
> Won't generic cache_line_size() macro do instead? It's defined as
> L1_CACHE_BYTES.

L1_CACHE_BYTES needs to be a compile time constant. As such it ends up
being defined to the largest cache line size for the range of CPUs built
into the kernel. This allows us to appropriately align data structures
to cache line boundaries which are boundaries for any of the CPUs which
are to be supported.

However, if you need to know exactly what cache line size you have for
doing things like cache maintainence then you can not use L1_CACHE_BYTES
or anything related to that.

One of the issues which complicates decoding the cache line size is that
on some CPUs, there's no way to read it. On later CPUs, there's the
cache type register, of which there's several different formats which
makes decoding it rather painful and complicated. Then there's the
related issue as to which cache line size you want - L1 Dcache, L1
Icache, or L2 cache, or some other level of cache?

It's all rather messy.

2011-02-24 19:57:00

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 4/4] msm: scm: Get cacheline size from CTR

On Thu, 24 Feb 2011, Stephen Boyd wrote:

> On 02/24/2011 11:01 AM, Thomas Gleixner wrote:
> > On Thu, 24 Feb 2011, Stephen Boyd wrote:
> >
> >>
> >> /**
> >> * scm_call() - Send an SCM command
> >> * @svc_id: service identifier
> >> @@ -243,11 +248,13 @@ int scm_call(u32 svc_id, u32 cmd_id, const void *cmd_buf, size_t cmd_len,
> >> do {
> >> u32 start = (u32)rsp;
> >> u32 end = (u32)scm_get_response_buffer(rsp) + resp_len;
> >> - start &= ~(CACHELINESIZE - 1);
> >> + u32 cacheline_size = dcache_line_size();
> >
> > And why do you want to do that on every scm_call() invocation and on
> > every loop of that code? If your dcache_line_size() changes at
> > runtime, then you might have other problems.
>
> I definitely don't want to do it for every loop. I'm fine with getting
> it every scm_call() invocation though.
>
> For now, I'll pull the end and cacheline_size variables out of the
> do-while loop.

Why not do it correct right away and retrieve it in an __init
function?

Thanks,

tglx

2011-02-25 12:18:07

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 1/4] msm: scm: Mark inline asm as volatile

Hi Stephen,

On Thu, 2011-02-24 at 18:44 +0000, Stephen Boyd wrote:
> We don't want the compiler to remove these asm statements or
> reorder them in any way. Mark them as volatile to be sure.
>
> Signed-off-by: Stephen Boyd <[email protected]>
> ---
> arch/arm/mach-msm/scm.c | 4 ++--
> 1 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
> index f4b9bc9..ba57b5a 100644
> --- a/arch/arm/mach-msm/scm.c
> +++ b/arch/arm/mach-msm/scm.c
> @@ -174,7 +174,7 @@ static u32 smc(u32 cmd_addr)
> register u32 r0 asm("r0") = 1;
> register u32 r1 asm("r1") = (u32)&context_id;
> register u32 r2 asm("r2") = cmd_addr;
> - asm(
> + asm volatile(
> __asmeq("%0", "r0")
> __asmeq("%1", "r0")
> __asmeq("%2", "r1")
> @@ -271,7 +271,7 @@ u32 scm_get_version(void)
> return version;
>
> mutex_lock(&scm_lock);
> - asm(
> + asm volatile(
> __asmeq("%0", "r1")
> __asmeq("%1", "r0")
> __asmeq("%2", "r1")


These asm blocks all have sensible looking output constraints. Why
do they need to be marked volatile?

Will

2011-02-25 13:23:48

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

On Thu, 2011-02-24 at 18:44 +0000, Stephen Boyd wrote:
> Assign the registers used in the inline assembly immediately
> before the inline assembly block. This ensures the compiler
> doesn't optimize away dead register assignments when it
> shouldn't.
>
> Signed-off-by: Stephen Boyd <[email protected]>
> ---
> arch/arm/mach-msm/scm.c | 7 +++++--
> 1 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
> index ba57b5a..5eddf54 100644
> --- a/arch/arm/mach-msm/scm.c
> +++ b/arch/arm/mach-msm/scm.c
> @@ -264,13 +264,16 @@ u32 scm_get_version(void)
> {
> int context_id;
> static u32 version = -1;
> - register u32 r0 asm("r0") = 0x1 << 8;
> - register u32 r1 asm("r1") = (u32)&context_id;
> + register u32 r0 asm("r0");
> + register u32 r1 asm("r1");
>
> if (version != -1)
> return version;
>
> mutex_lock(&scm_lock);
> +
> + r0 = 0x1 << 8;
> + r1 = (u32)&context_id;
> asm volatile(
> __asmeq("%0", "r1")
> __asmeq("%1", "r0")


Whoa, have you seen the compiler `optimise' the original assignments
away? Since there is a use in the asm block, the definition shouldn't
be omitted. What toolchain are you using?

Will

2011-02-25 19:05:32

by Stephen Boyd

[permalink] [raw]
Subject: Re: [PATCH 1/4] msm: scm: Mark inline asm as volatile

On 02/25/2011 03:56 AM, Will Deacon wrote:
> Hi Stephen,
>
> On Thu, 2011-02-24 at 18:44 +0000, Stephen Boyd wrote:
>> We don't want the compiler to remove these asm statements or
>> reorder them in any way. Mark them as volatile to be sure.
>>
>> Signed-off-by: Stephen Boyd <[email protected]>
>> ---
>> arch/arm/mach-msm/scm.c | 4 ++--
>> 1 files changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
>> index f4b9bc9..ba57b5a 100644
>> --- a/arch/arm/mach-msm/scm.c
>> +++ b/arch/arm/mach-msm/scm.c
>> @@ -174,7 +174,7 @@ static u32 smc(u32 cmd_addr)
>> register u32 r0 asm("r0") = 1;
>> register u32 r1 asm("r1") = (u32)&context_id;
>> register u32 r2 asm("r2") = cmd_addr;
>> - asm(
>> + asm volatile(
>> __asmeq("%0", "r0")
>> __asmeq("%1", "r0")
>> __asmeq("%2", "r1")
>> @@ -271,7 +271,7 @@ u32 scm_get_version(void)
>> return version;
>>
>> mutex_lock(&scm_lock);
>> - asm(
>> + asm volatile(
>> __asmeq("%0", "r1")
>> __asmeq("%1", "r0")
>> __asmeq("%2", "r1")
>
>
> These asm blocks all have sensible looking output constraints. Why
> do they need to be marked volatile?

I'm not seeing any different code with or without this so I saw little
harm in marking them as volatile. I really don't want the compiler
moving them or deleting them so it seemed safer to just mark it volatile
to make sure nothing happens to the smc instructions.

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-25 19:22:21

by Stephen Boyd

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

On 02/25/2011 05:23 AM, Will Deacon wrote:
> On Thu, 2011-02-24 at 18:44 +0000, Stephen Boyd wrote:
>> Assign the registers used in the inline assembly immediately
>> before the inline assembly block. This ensures the compiler
>> doesn't optimize away dead register assignments when it
>> shouldn't.
>>
>> Signed-off-by: Stephen Boyd <[email protected]>
>> ---
>> arch/arm/mach-msm/scm.c | 7 +++++--
>> 1 files changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
>> index ba57b5a..5eddf54 100644
>> --- a/arch/arm/mach-msm/scm.c
>> +++ b/arch/arm/mach-msm/scm.c
>> @@ -264,13 +264,16 @@ u32 scm_get_version(void)
>> {
>> int context_id;
>> static u32 version = -1;
>> - register u32 r0 asm("r0") = 0x1 << 8;
>> - register u32 r1 asm("r1") = (u32)&context_id;
>> + register u32 r0 asm("r0");
>> + register u32 r1 asm("r1");
>>
>> if (version != -1)
>> return version;
>>
>> mutex_lock(&scm_lock);
>> +
>> + r0 = 0x1 << 8;
>> + r1 = (u32)&context_id;
>> asm volatile(
>> __asmeq("%0", "r1")
>> __asmeq("%1", "r0")
>
>
> Whoa, have you seen the compiler `optimise' the original assignments
> away? Since there is a use in the asm block, the definition shouldn't
> be omitted. What toolchain are you using

Yes I've seen the r0 and r1 assignments get optimized away. I'm
suspecting it's because the mutex_lock() is between the assignments and
usage. I'm guessing the assignment to r0 and r1 are actually generated,
but then they're optimized away because the mutex_lock() isn't inlined
and thus r0 and r1 assigned to something that isn't &scm_lock can be
"safely" removed (dead register assignments). I can't confirm any of
this since I don't know what gcc is doing internally or even how to
probe what it's doing (yeah I know I could go read gcc sources).
Suggestions?

I've seen it with two different compilers so far:

gcc (Sourcery G++ Lite 2010.09-50) 4.5.1
arm-eabi-gcc (GCC) 4.4.0

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-26 05:09:13

by Saravana Kannan

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

On 02/25/2011 05:23 AM, Will Deacon wrote:
> On Thu, 2011-02-24 at 18:44 +0000, Stephen Boyd wrote:
>> Assign the registers used in the inline assembly immediately
>> before the inline assembly block. This ensures the compiler
>> doesn't optimize away dead register assignments when it
>> shouldn't.
>>
>> Signed-off-by: Stephen Boyd<[email protected]>
>> ---
>> arch/arm/mach-msm/scm.c | 7 +++++--
>> 1 files changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
>> index ba57b5a..5eddf54 100644
>> --- a/arch/arm/mach-msm/scm.c
>> +++ b/arch/arm/mach-msm/scm.c
>> @@ -264,13 +264,16 @@ u32 scm_get_version(void)
>> {
>> int context_id;
>> static u32 version = -1;
>> - register u32 r0 asm("r0") = 0x1<< 8;
>> - register u32 r1 asm("r1") = (u32)&context_id;
>> + register u32 r0 asm("r0");
>> + register u32 r1 asm("r1");
>>
>> if (version != -1)
>> return version;
>>
>> mutex_lock(&scm_lock);
>> +
>> + r0 = 0x1<< 8;
>> + r1 = (u32)&context_id;
>> asm volatile(
>> __asmeq("%0", "r1")
>> __asmeq("%1", "r0")
>
>
> Whoa, have you seen the compiler `optimise' the original assignments
> away? Since there is a use in the asm block, the definition shouldn't
> be omitted. What toolchain are you using?
>

Yeah, Stephen and I spent quite a bit of time discussing this and
experimenting to figure out what the heck GCC was doing. But it kept
optimizing the fake code we put in trying to force GCC to use a specific
register.

My hypothesis at this point is that the "register xx asm("rx")"
declarations are just for giving a symbolic name to refer to the
specific register in C code. I doesn't tell GCC to reserve away the
register and make sure the value is preserved. And the assignments to
these said variables seem to translate to a pure "mov rx, 5" kinda
instruction with no further preservation of rx either.

That's the only hypothesis I/we could come up with as to how this got
optimized away.

I would be great if someone explains the exact meaning of these
"register asm" declarations and the assignments in C code.

-Saravana

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-26 08:48:13

by Russell King - ARM Linux

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

On Fri, Feb 25, 2011 at 09:09:05PM -0800, Saravana Kannan wrote:
> Yeah, Stephen and I spent quite a bit of time discussing this and
> experimenting to figure out what the heck GCC was doing. But it kept
> optimizing the fake code we put in trying to force GCC to use a specific
> register.

One way to look at it is that if you specify a value for r0, assign it,
and then call a function, how do you expect the r0 value to be preserved?
r0 will be corrupted by the called function as its used to pass arg0 and
the return value.

I'm surprised the compiler didn't spit out an error.

2011-02-26 17:58:55

by David Brown

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

On Sat, Feb 26 2011, Russell King - ARM Linux wrote:

> On Fri, Feb 25, 2011 at 09:09:05PM -0800, Saravana Kannan wrote:
>> Yeah, Stephen and I spent quite a bit of time discussing this and
>> experimenting to figure out what the heck GCC was doing. But it kept
>> optimizing the fake code we put in trying to force GCC to use a specific
>> register.
>
> One way to look at it is that if you specify a value for r0, assign it,
> and then call a function, how do you expect the r0 value to be preserved?
> r0 will be corrupted by the called function as its used to pass arg0 and
> the return value.

> I'm surprised the compiler didn't spit out an error.

The gcc docs say:

* Local register variables in specific registers do not reserve the
registers, except at the point where they are used as input or
output operands in an `asm' statement and the `asm' statement
itself is not deleted. The compiler's data flow analysis is
capable of determining where the specified registers contain live
values, and where they are available for other uses. Stores into
local register variables may be deleted when they appear to be
dead according to dataflow analysis. References to local register
variables may be deleted or moved or simplified.

which would suggest that it should at least detect that it can't keep
the value in r0. What it seems to do is detect that the value can't be
in the register, so it never bothers putting it there in the first
place.

In any case, fortunately it works with the fix.

David

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-26 18:12:46

by David Brown

[permalink] [raw]
Subject: Re: [PATCH 1/4] msm: scm: Mark inline asm as volatile

On Fri, Feb 25 2011, Will Deacon wrote:

> On Thu, 2011-02-24 at 18:44 +0000, Stephen Boyd wrote:
>> We don't want the compiler to remove these asm statements or
>> reorder them in any way. Mark them as volatile to be sure.
>>
>> Signed-off-by: Stephen Boyd <[email protected]>
>> ---
>> arch/arm/mach-msm/scm.c | 4 ++--
>> 1 files changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
>> index f4b9bc9..ba57b5a 100644
>> --- a/arch/arm/mach-msm/scm.c
>> +++ b/arch/arm/mach-msm/scm.c
>> @@ -174,7 +174,7 @@ static u32 smc(u32 cmd_addr)
>> register u32 r0 asm("r0") = 1;
>> register u32 r1 asm("r1") = (u32)&context_id;
>> register u32 r2 asm("r2") = cmd_addr;
>> - asm(
>> + asm volatile(
>> __asmeq("%0", "r0")
>> __asmeq("%1", "r0")
>> __asmeq("%2", "r1")
>> @@ -271,7 +271,7 @@ u32 scm_get_version(void)
>> return version;
>>
>> mutex_lock(&scm_lock);
>> - asm(
>> + asm volatile(
>> __asmeq("%0", "r1")
>> __asmeq("%1", "r0")
>> __asmeq("%2", "r1")
>
> These asm blocks all have sensible looking output constraints. Why
> do they need to be marked volatile?

Without the volatile, the compiler is free to assume the only side
effects of the asm are to modify the output registers. The volatile is
needed to indicate to the compiler that the asm has other side effects.
There isn't enough optimization, yet, in gcc to change the generated
code in this case, so it happens to generate the correct code without
it.

The second probably doesn't need it, unless we are expecting the version
to change dynamically. The volatile makes the scm_get_version()
function clearly a call to scm, though, so is probably useful to
document the intent.

David

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-26 19:43:19

by Nicolas Pitre

[permalink] [raw]
Subject: Re: [PATCH 1/4] msm: scm: Mark inline asm as volatile

On Sat, 26 Feb 2011, David Brown wrote:

> On Fri, Feb 25 2011, Will Deacon wrote:
>
> > On Thu, 2011-02-24 at 18:44 +0000, Stephen Boyd wrote:
> >> We don't want the compiler to remove these asm statements or
> >> reorder them in any way. Mark them as volatile to be sure.
> >>
> >> Signed-off-by: Stephen Boyd <[email protected]>
> >> ---
> >> arch/arm/mach-msm/scm.c | 4 ++--
> >> 1 files changed, 2 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
> >> index f4b9bc9..ba57b5a 100644
> >> --- a/arch/arm/mach-msm/scm.c
> >> +++ b/arch/arm/mach-msm/scm.c
> >> @@ -174,7 +174,7 @@ static u32 smc(u32 cmd_addr)
> >> register u32 r0 asm("r0") = 1;
> >> register u32 r1 asm("r1") = (u32)&context_id;
> >> register u32 r2 asm("r2") = cmd_addr;
> >> - asm(
> >> + asm volatile(
> >> __asmeq("%0", "r0")
> >> __asmeq("%1", "r0")
> >> __asmeq("%2", "r1")
> >> @@ -271,7 +271,7 @@ u32 scm_get_version(void)
> >> return version;
> >>
> >> mutex_lock(&scm_lock);
> >> - asm(
> >> + asm volatile(
> >> __asmeq("%0", "r1")
> >> __asmeq("%1", "r0")
> >> __asmeq("%2", "r1")
> >
> > These asm blocks all have sensible looking output constraints. Why
> > do they need to be marked volatile?
>
> Without the volatile, the compiler is free to assume the only side
> effects of the asm are to modify the output registers. The volatile is
> needed to indicate to the compiler that the asm has other side effects.
> There isn't enough optimization, yet, in gcc to change the generated
> code in this case, so it happens to generate the correct code without
> it.
>
> The second probably doesn't need it, unless we are expecting the version
> to change dynamically. The volatile makes the scm_get_version()
> function clearly a call to scm, though, so is probably useful to
> document the intent.

If the inline asm does have side effects which are not obvious other
than producing a result for the output operand then it is a good idea to
add a comment to that effect. Otherwise it is always best to omit the
volatile and let gcc move the inline asm around or even delete it
entirely when possible.


Nicolas

2011-02-26 20:04:48

by Nicolas Pitre

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

On Sat, 26 Feb 2011, David Brown wrote:

> On Sat, Feb 26 2011, Russell King - ARM Linux wrote:
>
> > On Fri, Feb 25, 2011 at 09:09:05PM -0800, Saravana Kannan wrote:
> > One way to look at it is that if you specify a value for r0, assign it,
> > and then call a function, how do you expect the r0 value to be preserved?
> > r0 will be corrupted by the called function as its used to pass arg0 and
> > the return value.
>
> > I'm surprised the compiler didn't spit out an error.

Me too. The compiler should have moved the content of r0 somewhere else
before the function call, and restore it back into r0 if necessary
before the point where the corresponding variable is used again. Or at
least issue a warning if it can't do that.

> The gcc docs say:
>
> * Local register variables in specific registers do not reserve the
> registers, except at the point where they are used as input or
> output operands in an `asm' statement and the `asm' statement
> itself is not deleted. The compiler's data flow analysis is
> capable of determining where the specified registers contain live
> values, and where they are available for other uses. Stores into
> local register variables may be deleted when they appear to be
> dead according to dataflow analysis. References to local register
> variables may be deleted or moved or simplified.
>
> which would suggest that it should at least detect that it can't keep
> the value in r0. What it seems to do is detect that the value can't be
> in the register, so it never bothers putting it there in the first
> place.

Right. A minimal test case may look like this if someone feels like
filling a gcc bug report:

extern int foo(int x);

int bar(int x)
{
register int a asm("r0") = 1;
x = foo(x);
asm ("add %0, %1, %2" : "=r" (x) : "r" (a), "r" (x));
return x;
}

And the produced code is:

bar:
stmfd sp!, {r3, lr}
bl foo
#APP
add r0, r0, r0
ldmfd sp!, {r3, pc}

So this is clearly bogus.

> In any case, fortunately it works with the fix.

Please add a comment in your patch to explain the issue.


Nicolas

2011-02-27 11:10:48

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 1/4] msm: scm: Mark inline asm as volatile

On Sat, 2011-02-26 at 18:12 +0000, David Brown wrote:
> On Fri, Feb 25 2011, Will Deacon wrote:

> > These asm blocks all have sensible looking output constraints. Why
> > do they need to be marked volatile?
>
> Without the volatile, the compiler is free to assume the only side
> effects of the asm are to modify the output registers. The volatile is
> needed to indicate to the compiler that the asm has other side effects.

As far as I know, volatile asm does two things:

(1) It stops the compiler from reordering the asm block with respect to
other volatile statements.

(2) It prevents the compiler from optimising the block away when
dataflow analysis indicates it's not required.

If side-effects need to be indicated, won't a memory clobber do the
trick?

Will

2011-02-27 17:38:46

by David Brown

[permalink] [raw]
Subject: Re: [PATCH 1/4] msm: scm: Mark inline asm as volatile

On Sun, Feb 27 2011, Will Deacon wrote:

> On Sat, 2011-02-26 at 18:12 +0000, David Brown wrote:
>> On Fri, Feb 25 2011, Will Deacon wrote:
>
>> > These asm blocks all have sensible looking output constraints. Why
>> > do they need to be marked volatile?
>>
>> Without the volatile, the compiler is free to assume the only side
>> effects of the asm are to modify the output registers. The volatile is
>> needed to indicate to the compiler that the asm has other side effects.
>
> As far as I know, volatile asm does two things:
>
> (1) It stops the compiler from reordering the asm block with respect to
> other volatile statements.
>
> (2) It prevents the compiler from optimising the block away when
> dataflow analysis indicates it's not required.
>
> If side-effects need to be indicated, won't a memory clobber do the
> trick?

Per the gcc manual:

If your assembler instructions access memory in an unpredictable
fashion, add `memory' to the list of clobbered registers. This will
cause GCC to not keep memory values cached in registers across the
assembler instruction and not optimize stores or loads to that
memory. You will also want to add the `volatile' keyword if the
memory affected is not listed in the inputs or outputs of the `asm',
as the `memory' clobber does not count as a side-effect of the `asm'.
If you know how large the accessed memory is, you can add it as input
or output but if this is not known, you should add `memory'. As an
example, if you access ten bytes of a string, you can use a memory
input like:

The smc instruction is similar to a syscall. When in the secure world,
the processor is making state changes. It's not quite correct to
declare this as memory, because the memory used when secure isn't even
accessible to us. As far as I can tell, the volatile is the only way to
tell the compiler this.

David

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-27 17:41:32

by David Brown

[permalink] [raw]
Subject: Re: [PATCH 1/4] msm: scm: Mark inline asm as volatile

On Sat, Feb 26 2011, Nicolas Pitre wrote:

> On Sat, 26 Feb 2011, David Brown wrote:
>
>> On Fri, Feb 25 2011, Will Deacon wrote:
>>
>> > On Thu, 2011-02-24 at 18:44 +0000, Stephen Boyd wrote:
>> >> We don't want the compiler to remove these asm statements or
>> >> reorder them in any way. Mark them as volatile to be sure.
>> >>
>> >> Signed-off-by: Stephen Boyd <[email protected]>
>> >> ---
>> >> arch/arm/mach-msm/scm.c | 4 ++--
>> >> 1 files changed, 2 insertions(+), 2 deletions(-)
>> >>
>> >> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
>> >> index f4b9bc9..ba57b5a 100644
>> >> --- a/arch/arm/mach-msm/scm.c
>> >> +++ b/arch/arm/mach-msm/scm.c
>> >> @@ -174,7 +174,7 @@ static u32 smc(u32 cmd_addr)
>> >> register u32 r0 asm("r0") = 1;
>> >> register u32 r1 asm("r1") = (u32)&context_id;
>> >> register u32 r2 asm("r2") = cmd_addr;
>> >> - asm(
>> >> + asm volatile(
>> >> __asmeq("%0", "r0")
>> >> __asmeq("%1", "r0")
>> >> __asmeq("%2", "r1")
>> >> @@ -271,7 +271,7 @@ u32 scm_get_version(void)
>> >> return version;
>> >>
>> >> mutex_lock(&scm_lock);
>> >> - asm(
>> >> + asm volatile(
>> >> __asmeq("%0", "r1")
>> >> __asmeq("%1", "r0")
>> >> __asmeq("%2", "r1")
>> >
>> > These asm blocks all have sensible looking output constraints. Why
>> > do they need to be marked volatile?
>>
>> Without the volatile, the compiler is free to assume the only side
>> effects of the asm are to modify the output registers. The volatile is
>> needed to indicate to the compiler that the asm has other side effects.
>> There isn't enough optimization, yet, in gcc to change the generated
>> code in this case, so it happens to generate the correct code without
>> it.
>>
>> The second probably doesn't need it, unless we are expecting the version
>> to change dynamically. The volatile makes the scm_get_version()
>> function clearly a call to scm, though, so is probably useful to
>> document the intent.
>
> If the inline asm does have side effects which are not obvious other
> than producing a result for the output operand then it is a good idea to
> add a comment to that effect. Otherwise it is always best to omit the
> volatile and let gcc move the inline asm around or even delete it
> entirely when possible.

Would this be better as a comment by the assembly or for the whole file
or function? The entire purpose of this file is to communicate with
another logical processor, so it's all about producing side effects
other than just modifying the registers or the memory. Maybe a file
comment briefly explaining that SCM runs in TrustZone and a short
comment by each asm stating that it traps to the other logical cpu?

David

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-02-28 02:21:10

by Nicolas Pitre

[permalink] [raw]
Subject: Re: [PATCH 1/4] msm: scm: Mark inline asm as volatile

On Sun, 27 Feb 2011, David Brown wrote:

> On Sat, Feb 26 2011, Nicolas Pitre wrote:
>
> > On Sat, 26 Feb 2011, David Brown wrote:
> >
> >> On Fri, Feb 25 2011, Will Deacon wrote:
> >>
> >> > On Thu, 2011-02-24 at 18:44 +0000, Stephen Boyd wrote:
> >> >> We don't want the compiler to remove these asm statements or
> >> >> reorder them in any way. Mark them as volatile to be sure.
> >> >>
> >> >> Signed-off-by: Stephen Boyd <[email protected]>
> >> >> ---
> >> >> arch/arm/mach-msm/scm.c | 4 ++--
> >> >> 1 files changed, 2 insertions(+), 2 deletions(-)
> >> >>
> >> >> diff --git a/arch/arm/mach-msm/scm.c b/arch/arm/mach-msm/scm.c
> >> >> index f4b9bc9..ba57b5a 100644
> >> >> --- a/arch/arm/mach-msm/scm.c
> >> >> +++ b/arch/arm/mach-msm/scm.c
> >> >> @@ -174,7 +174,7 @@ static u32 smc(u32 cmd_addr)
> >> >> register u32 r0 asm("r0") = 1;
> >> >> register u32 r1 asm("r1") = (u32)&context_id;
> >> >> register u32 r2 asm("r2") = cmd_addr;
> >> >> - asm(
> >> >> + asm volatile(
> >> >> __asmeq("%0", "r0")
> >> >> __asmeq("%1", "r0")
> >> >> __asmeq("%2", "r1")
> >> >> @@ -271,7 +271,7 @@ u32 scm_get_version(void)
> >> >> return version;
> >> >>
> >> >> mutex_lock(&scm_lock);
> >> >> - asm(
> >> >> + asm volatile(
> >> >> __asmeq("%0", "r1")
> >> >> __asmeq("%1", "r0")
> >> >> __asmeq("%2", "r1")
> >> >
> >> > These asm blocks all have sensible looking output constraints. Why
> >> > do they need to be marked volatile?
> >>
> >> Without the volatile, the compiler is free to assume the only side
> >> effects of the asm are to modify the output registers. The volatile is
> >> needed to indicate to the compiler that the asm has other side effects.
> >> There isn't enough optimization, yet, in gcc to change the generated
> >> code in this case, so it happens to generate the correct code without
> >> it.
> >>
> >> The second probably doesn't need it, unless we are expecting the version
> >> to change dynamically. The volatile makes the scm_get_version()
> >> function clearly a call to scm, though, so is probably useful to
> >> document the intent.
> >
> > If the inline asm does have side effects which are not obvious other
> > than producing a result for the output operand then it is a good idea to
> > add a comment to that effect. Otherwise it is always best to omit the
> > volatile and let gcc move the inline asm around or even delete it
> > entirely when possible.
>
> Would this be better as a comment by the assembly or for the whole file
> or function? The entire purpose of this file is to communicate with
> another logical processor, so it's all about producing side effects
> other than just modifying the registers or the memory. Maybe a file
> comment briefly explaining that SCM runs in TrustZone and a short
> comment by each asm stating that it traps to the other logical cpu?

Now that I've looked more closely at the actual code, I think it is
obvious enough that the volatile is needed and no extra comment should
be required.


Nicolas

2011-03-01 04:21:05

by Stephen Boyd

[permalink] [raw]
Subject: Re: [PATCH 4/4] msm: scm: Get cacheline size from CTR

On 02/24/2011 11:56 AM, Thomas Gleixner wrote:
> On Thu, 24 Feb 2011, Stephen Boyd wrote:
>
>>
>> I definitely don't want to do it for every loop. I'm fine with getting
>> it every scm_call() invocation though.
>>
>> For now, I'll pull the end and cacheline_size variables out of the
>> do-while loop.
>
> Why not do it correct right away and retrieve it in an __init
> function?

That would require an early_initcall, so hopefully that is fine.

I wonder why the generic arm v7 cache operations don't do the same thing
and store the dcache line size somewhere. Every dma operation is
essentially calling dcache_line_size(). Perhaps some generic arm code
should be determining the dcache line size really early on and storing
it in the proc_info_list? Then both the dma code and scm code could
query the processor for the dcache line size with something like
cpu_dcache_line_size?

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-03-01 10:30:53

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 1/4] msm: scm: Mark inline asm as volatile

Hi David,

On Sun, 2011-02-27 at 17:38 +0000, David Brown wrote:
> Per the gcc manual:
>
> If your assembler instructions access memory in an unpredictable
> fashion, add `memory' to the list of clobbered registers. This will
> cause GCC to not keep memory values cached in registers across the
> assembler instruction and not optimize stores or loads to that
> memory. You will also want to add the `volatile' keyword if the
> memory affected is not listed in the inputs or outputs of the `asm',
> as the `memory' clobber does not count as a side-effect of the `asm'.
> If you know how large the accessed memory is, you can add it as input
> or output but if this is not known, you should add `memory'. As an
> example, if you access ten bytes of a string, you can use a memory
> input like:
>
Right, so if you neglected to check the output from the smc block then
it would be a candidate for removal even with a memory clobber. Now I
see why you want a volatile in there!

For what it's worth:

Acked-by: Will Deacon <[email protected]>

Will

2011-03-01 10:37:35

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

Hi Nicolas,

On Sat, 2011-02-26 at 20:04 +0000, Nicolas Pitre wrote:
> > The gcc docs say:
> >
> > * Local register variables in specific registers do not reserve the
> > registers, except at the point where they are used as input or
> > output operands in an `asm' statement and the `asm' statement
> > itself is not deleted. The compiler's data flow analysis is
> > capable of determining where the specified registers contain live
> > values, and where they are available for other uses. Stores into
> > local register variables may be deleted when they appear to be
> > dead according to dataflow analysis. References to local register
> > variables may be deleted or moved or simplified.
> >
> > which would suggest that it should at least detect that it can't keep
> > the value in r0. What it seems to do is detect that the value can't be
> > in the register, so it never bothers putting it there in the first
> > place.

I suspect it sees the function call as a write to r0 and then somehow
infers that the live range of the int r0 variable ends there. Without a
use in the live range it then decides it can optimise away the
definition. It really comes down to whether or not the variable is
characterised by its identifier or the register in which it resides.

> Right. A minimal test case may look like this if someone feels like
> filling a gcc bug report:
>
> extern int foo(int x);
>
> int bar(int x)
> {
> register int a asm("r0") = 1;
> x = foo(x);
> asm ("add %0, %1, %2" : "=r" (x) : "r" (a), "r" (x));
> return x;
> }
>
> And the produced code is:
>
> bar:
> stmfd sp!, {r3, lr}
> bl foo
> #APP
> add r0, r0, r0
> ldmfd sp!, {r3, pc}
>
> So this is clearly bogus.
>

I agree that this is wrong, but the compiler people may try and argue
the other way. I'll ask some of the compiler guys at ARM and see what
they think.

> > In any case, fortunately it works with the fix.
>
> Please add a comment in your patch to explain the issue.
>

Perhaps a more robust fix would be to remove the register int
declarations and handle the parameter marshalling in the same asm block
that contains the smc?

Will

2011-03-01 13:54:21

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

Nicolas,

On Sat, 2011-02-26 at 20:04 +0000, Nicolas Pitre wrote:
> Right. A minimal test case may look like this if someone feels like
> filling a gcc bug report:
>
> extern int foo(int x);
>
> int bar(int x)
> {
> register int a asm("r0") = 1;
> x = foo(x);
> asm ("add %0, %1, %2" : "=r" (x) : "r" (a), "r" (x));
> return x;
> }
>
> And the produced code is:
>
> bar:
> stmfd sp!, {r3, lr}
> bl foo
> #APP
> add r0, r0, r0
> ldmfd sp!, {r3, pc}
>
> So this is clearly bogus.


I've had a chat with the compiler guys and they confirmed that this is a
known bug. There's a really hairy bug report here:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38815

It looks like the GCC stance will change in the future so that register
variables will only be guaranteed to live in the specified register
during asm blocks which use them. If the register is required elsewhere,
spill/reload code will be emitted as necessary. This might break some
weird and wonderful code (passing hidden operands to functions?) but I
don't think we rely on the current behaviour anywhere in the kernel.

Will

2011-03-01 21:29:18

by Saravana Kannan

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

On 03/01/2011 02:37 AM, Will Deacon wrote:
> Hi Nicolas,
>
> On Sat, 2011-02-26 at 20:04 +0000, Nicolas Pitre wrote:
>> Right. A minimal test case may look like this if someone feels like
>> filling a gcc bug report:
>>
>> extern int foo(int x);
>>
>> int bar(int x)
>> {
>> register int a asm("r0") = 1;
>> x = foo(x);
>> asm ("add %0, %1, %2" : "=r" (x) : "r" (a), "r" (x));
>> return x;
>> }
>>
>> And the produced code is:
>>
>> bar:
>> stmfd sp!, {r3, lr}
>> bl foo
>> #APP
>> add r0, r0, r0
>> ldmfd sp!, {r3, pc}
>>
>> So this is clearly bogus.
>>
>
> I agree that this is wrong, but the compiler people may try and argue
> the other way. I'll ask some of the compiler guys at ARM and see what
> they think.

Nicolas and Will,

Thanks for the sample bug code and thanks for checking with the compiler
guys and validating (in another thread) that this is indeed a bug in
GCC. Glad to know we weren't doing something stupid.

>>> In any case, fortunately it works with the fix.
>>
>> Please add a comment in your patch to explain the issue.
>>
>
> Perhaps a more robust fix would be to remove the register int
> declarations and handle the parameter marshalling in the same asm block
> that contains the smc?

I was thinking the same, but the opposing idea I heard was that not
doing it inside the asm block would allow GCC to be make better use of
the registers. Didn't have a strong opinion either way, so we went with
the implementation that was sent out.

Thanks,
Saravana
--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-03-02 00:02:30

by Nicolas Pitre

[permalink] [raw]
Subject: Re: [PATCH 2/4] msm: scm: Fix improper register assignment

On Tue, 1 Mar 2011, Saravana Kannan wrote:

> On 03/01/2011 02:37 AM, Will Deacon wrote:
> > Perhaps a more robust fix would be to remove the register int
> > declarations and handle the parameter marshalling in the same asm block
> > that contains the smc?
>
> I was thinking the same, but the opposing idea I heard was that not doing it
> inside the asm block would allow GCC to be make better use of the registers.

Indeed. And a significant body of code out there does rely on this gcc
feature, so it has to minimally work.

> Didn't have a strong opinion either way, so we went with the implementation
> that was sent out.

ACK.


Nicolas

2011-03-09 19:29:10

by Stephen Boyd

[permalink] [raw]
Subject: Re: [PATCH 0/4] SCM fixes and updates

On 02/24/2011 10:44 AM, Stephen Boyd wrote:
> These are a few updates to SCM. The first two patches fix some
> bad code generation. The next patch saves a couple instructions
> on the slow path and the final patch determines the cacheline
> size dynamically instead of statically.
>
> Stephen Boyd (4):
> msm: scm: Mark inline asm as volatile
> msm: scm: Fix improper register assignment
> msm: scm: Check for interruption immediately
> msm: scm: Get cacheline size from CTR
>

Can we queue up patches 1 to 3 from this series for the next window? It
looks like everyone is ok with them. I'll respin the fourth patch once I
figure out where to go with it.

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

2011-03-10 20:06:54

by David Brown

[permalink] [raw]
Subject: Re: [PATCH 0/4] SCM fixes and updates

On Wed, Mar 09 2011, Stephen Boyd wrote:

> On 02/24/2011 10:44 AM, Stephen Boyd wrote:
>> These are a few updates to SCM. The first two patches fix some
>> bad code generation. The next patch saves a couple instructions
>> on the slow path and the final patch determines the cacheline
>> size dynamically instead of statically.
>>
>> Stephen Boyd (4):
>> msm: scm: Mark inline asm as volatile
>> msm: scm: Fix improper register assignment
>> msm: scm: Check for interruption immediately
>> msm: scm: Get cacheline size from CTR
>
> Can we queue up patches 1 to 3 from this series for the next window? It
> looks like everyone is ok with them. I'll respin the fourth patch once I
> figure out where to go with it.

Seems reasonable to me. I'll break the first three out and include them
into msm-next.

Stephen Boyd (3):
msm: scm: Mark inline asm as volatile
msm: scm: Fix improper register assignment
msm: scm: Check for interruption immediately

arch/arm/mach-msm/scm.c | 58 ++++++++++++++++++++++++++---------------------
1 files changed, 32 insertions(+), 26 deletions(-)

I have pulled these changes into my MSM tree, which can be
found at

git://codeaurora.org/quic/kernel/davidb/linux-msm.git

in the for-next branch

This patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)

The path will hopefully also be merged in Linus' tree for the next
-rc kernel release.

Thanks,
David

--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.