2006-05-09 08:54:52

by Chris Wright

[permalink] [raw]
Subject: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

Abstract the code that controls interrupt delivery, and add a separate
subarch implementation for Xen that manipulates a shared-memory event
delivery mask.

Signed-off-by: Ian Pratt <[email protected]>
Signed-off-by: Christian Limpach <[email protected]>
Signed-off-by: Chris Wright <[email protected]>
---
include/asm-i386/mach-default/mach_system.h | 24 ++++++
include/asm-i386/mach-xen/mach_system.h | 103 ++++++++++++++++++++++++++++
include/asm-i386/system.h | 20 -----
3 files changed, 128 insertions(+), 19 deletions(-)

--- linus-2.6.orig/include/asm-i386/system.h
+++ linus-2.6/include/asm-i386/system.h
@@ -490,25 +490,7 @@ static inline unsigned long long __cmpxc

#define set_wmb(var, value) do { var = value; wmb(); } while (0)

-/* interrupt control.. */
-#define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
-#define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
-#define local_irq_disable() __asm__ __volatile__("cli": : :"memory")
-#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt() __asm__ __volatile__("hlt": : :"memory")
-
-#define irqs_disabled() \
-({ \
- unsigned long flags; \
- local_save_flags(flags); \
- !(flags & (1<<9)); \
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+#include <mach_system.h>

/*
* disable hlt during certain critical i/o operations
--- /dev/null
+++ linus-2.6/include/asm-i386/mach-default/mach_system.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_MACH_SYSTEM_H
+#define __ASM_MACH_SYSTEM_H
+
+/* interrupt control.. */
+#define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
+#define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
+#define local_irq_disable() __asm__ __volatile__("cli": : :"memory")
+#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
+/* used when interrupts are already enabled or to shutdown the processor */
+#define halt() __asm__ __volatile__("hlt": : :"memory")
+
+#define irqs_disabled() \
+({ \
+ unsigned long flags; \
+ local_save_flags(flags); \
+ !(flags & (1<<9)); \
+})
+
+/* For spinlocks etc */
+#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+
+#endif /* __ASM_MACH_SYSTEM_H */
--- /dev/null
+++ linus-2.6/include/asm-i386/mach-xen/mach_system.h
@@ -0,0 +1,103 @@
+#ifndef __ASM_MACH_SYSTEM_H
+#define __ASM_MACH_SYSTEM_H
+
+#ifdef __KERNEL__
+
+#include <asm/hypervisor.h>
+
+#ifdef CONFIG_SMP
+#define __vcpu_id smp_processor_id()
+#else
+#define __vcpu_id 0
+#endif
+
+/* interrupt control.. */
+
+/*
+ * The use of 'barrier' in the following reflects their use as local-lock
+ * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following
+ * critical operations are executed. All critical operations must complete
+ * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also
+ * includes these barriers, for example.
+ */
+
+#define __cli() \
+do { \
+ struct vcpu_info *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
+} while (0)
+
+#define __sti() \
+do { \
+ struct vcpu_info *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
+ _vcpu->evtchn_upcall_mask = 0; \
+ barrier(); /* unmask then check (avoid races) */ \
+ if (unlikely(_vcpu->evtchn_upcall_pending)) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
+} while (0)
+
+#define __save_flags(x) \
+do { \
+ struct vcpu_info *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
+ preempt_enable(); \
+} while (0)
+
+#define __restore_flags(x) \
+do { \
+ struct vcpu_info *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
+ if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
+ barrier(); /* unmask then check (avoid races) */ \
+ if (unlikely(_vcpu->evtchn_upcall_pending)) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
+ } else \
+ preempt_enable_no_resched(); \
+} while (0)
+
+#define safe_halt() ((void)0)
+#define halt() ((void)0)
+
+#define __save_and_cli(x) \
+do { \
+ struct vcpu_info *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
+} while (0)
+
+#define local_irq_save(x) __save_and_cli(x)
+#define local_irq_restore(x) __restore_flags(x)
+#define local_save_flags(x) __save_flags(x)
+#define local_irq_disable() __cli()
+#define local_irq_enable() __sti()
+
+/* Cannot use preempt_enable() here as we would recurse in preempt_sched(). */
+#define irqs_disabled() \
+({ int ___x; \
+ struct vcpu_info *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
+ ___x = (_vcpu->evtchn_upcall_mask != 0); \
+ preempt_enable_no_resched(); \
+ ___x; })
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_MACH_SYSTEM_H */

--


2006-05-09 14:49:50

by Martin Bligh

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

> +/*
> + * The use of 'barrier' in the following reflects their use as local-lock
> + * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following
> + * critical operations are executed. All critical operations must complete
> + * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also
> + * includes these barriers, for example.
> + */

Seems like an odd comment to have in an i386 header file.

> +#define __cli() \
> +do { \
> + struct vcpu_info *_vcpu; \
> + preempt_disable(); \
> + _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
> + _vcpu->evtchn_upcall_mask = 1; \
> + preempt_enable_no_resched(); \
> + barrier(); \
> +} while (0)

Should be a real function

> +#define __sti() \
> +do { \
> + struct vcpu_info *_vcpu; \
> + barrier(); \
> + preempt_disable(); \
> + _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
> + _vcpu->evtchn_upcall_mask = 0; \
> + barrier(); /* unmask then check (avoid races) */ \
> + if (unlikely(_vcpu->evtchn_upcall_pending)) \
> + force_evtchn_callback(); \
> + preempt_enable(); \
> +} while (0)

Should be a real function

> +#define __save_flags(x) \
> +do { \
> + struct vcpu_info *_vcpu; \
> + preempt_disable(); \
> + _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
> + (x) = _vcpu->evtchn_upcall_mask; \
> + preempt_enable(); \
> +} while (0)

Should be a real function

> +#define __restore_flags(x) \
> +do { \
> + struct vcpu_info *_vcpu; \
> + barrier(); \
> + preempt_disable(); \
> + _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
> + if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
> + barrier(); /* unmask then check (avoid races) */ \
> + if (unlikely(_vcpu->evtchn_upcall_pending)) \
> + force_evtchn_callback(); \
> + preempt_enable(); \
> + } else \
> + preempt_enable_no_resched(); \
> +} while (0)

Should be a real function

> +#define safe_halt() ((void)0)
> +#define halt() ((void)0)
> +
> +#define __save_and_cli(x) \
> +do { \
> + struct vcpu_info *_vcpu; \
> + preempt_disable(); \
> + _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
> + (x) = _vcpu->evtchn_upcall_mask; \
> + _vcpu->evtchn_upcall_mask = 1; \
> + preempt_enable_no_resched(); \
> + barrier(); \
> +} while (0)

Should be a real function

> +#define local_irq_save(x) __save_and_cli(x)
> +#define local_irq_restore(x) __restore_flags(x)
> +#define local_save_flags(x) __save_flags(x)
> +#define local_irq_disable() __cli()
> +#define local_irq_enable() __sti()
> +
> +/* Cannot use preempt_enable() here as we would recurse in preempt_sched(). */
> +#define irqs_disabled() \
> +({ int ___x; \
> + struct vcpu_info *_vcpu; \
> + preempt_disable(); \
> + _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
> + ___x = (_vcpu->evtchn_upcall_mask != 0); \
> + preempt_enable_no_resched(); \
> + ___x; })
> +
> +#endif /* __KERNEL__ */

Should be a real function

2006-05-09 15:52:04

by Christian Limpach

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

On Tue, May 09, 2006 at 07:49:42AM -0700, Martin J. Bligh wrote:
> >+#define __cli() \
> >+do { \
> >+ struct vcpu_info *_vcpu; \
> >+ preempt_disable(); \
> >+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
> >+ _vcpu->evtchn_upcall_mask = 1; \
> >+ preempt_enable_no_resched(); \
> >+ barrier(); \
> >+} while (0)
>
> Should be a real function

Yes, except it's not trivially done because if __cli was an inline
function, you need to have everything that is used in the declaration
defined when the function is declared as opposed to when the #define
gets used. I'll give it another try, but it very quickly becomes
#include hell.

Anybody want to comment on the performance impact of making
local_irq_* non-inline functions?

christian

2006-05-09 16:02:19

by Martin Bligh

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

Christian Limpach wrote:
> On Tue, May 09, 2006 at 07:49:42AM -0700, Martin J. Bligh wrote:
>
>>>+#define __cli() \
>>>+do { \
>>>+ struct vcpu_info *_vcpu; \
>>>+ preempt_disable(); \
>>>+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
>>>+ _vcpu->evtchn_upcall_mask = 1; \
>>>+ preempt_enable_no_resched(); \
>>>+ barrier(); \
>>>+} while (0)
>>
>>Should be a real function
>
>
> Yes, except it's not trivially done because if __cli was an inline
> function, you need to have everything that is used in the declaration
> defined when the function is declared as opposed to when the #define
> gets used. I'll give it another try, but it very quickly becomes
> #include hell.
>
> Anybody want to comment on the performance impact of making
> local_irq_* non-inline functions?

I wasn't concerned with inline vs non-inline - that's your choice.
Just the inherent foulness of multi-line macros ;-)

M.


2006-05-09 16:08:18

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery


>
> Anybody want to comment on the performance impact of making
> local_irq_* non-inline functions?

I would guess for that much inline code it will be even a win to not
inline because it will save icache.

-Andi

2006-05-09 16:30:10

by Christian Limpach

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

On Tue, May 09, 2006 at 06:07:57PM +0200, Andi Kleen wrote:
>
> >
> > Anybody want to comment on the performance impact of making
> > local_irq_* non-inline functions?
>
> I would guess for that much inline code it will be even a win to not
> inline because it will save icache.

Maybe, although some of the macros compile down to only 2-3 instructions.

christian

2006-05-09 16:31:45

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

On Tuesday 09 May 2006 18:29, Christian Limpach wrote:
> On Tue, May 09, 2006 at 06:07:57PM +0200, Andi Kleen wrote:
> >
> > >
> > > Anybody want to comment on the performance impact of making
> > > local_irq_* non-inline functions?
> >
> > I would guess for that much inline code it will be even a win to not
> > inline because it will save icache.
>
> Maybe, although some of the macros compile down to only 2-3 instructions.

Can you post before/after vmlinux size numbers for inline/out of line?

-Andi

2006-05-09 19:23:26

by Nick Piggin

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

Martin J. Bligh wrote:
>> +/*
>> + * The use of 'barrier' in the following reflects their use as
>> local-lock
>> + * operations. Reentrancy must be prevented (e.g., __cli()) /before/
>> following
>> + * critical operations are executed. All critical operations must
>> complete
>> + * /before/ reentrancy is permitted (e.g., __sti()). Alpha
>> architecture also
>> + * includes these barriers, for example.
>> + */
>
>
> Seems like an odd comment to have in an i386 header file.

Also, it is only talking about compiler barriers, which have nothing
to do with the architecture.

And preempt_* macros should contain the correct compiler barriers, so
several can be removed.

--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com

2006-05-09 20:42:19

by Christian Limpach

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

On Tue, May 09, 2006 at 06:31:37PM +0200, Andi Kleen wrote:
> On Tuesday 09 May 2006 18:29, Christian Limpach wrote:
> > On Tue, May 09, 2006 at 06:07:57PM +0200, Andi Kleen wrote:
> > >
> > > >
> > > > Anybody want to comment on the performance impact of making
> > > > local_irq_* non-inline functions?
> > >
> > > I would guess for that much inline code it will be even a win to not
> > > inline because it will save icache.
> >
> > Maybe, although some of the macros compile down to only 2-3 instructions.
>
> Can you post before/after vmlinux size numbers for inline/out of line?

Sure, although it is a bit tricky since the #define's pass non-pointer
arguments by reference. This would also make it quite ugly to change
these.

Everything[1] in line:
-rwxr-xr-x 1 cl349 cl349 2633640 May 9 19:42 vmlinux-inline-stripped
Everything out of line:
-rwxr-xr-x 1 cl349 cl349 2621352 May 9 19:45 vmlinux-outline-stripped

Additionally, I changed did a build with only __sti and __restore_flags
out of line and the others in line:
-rwxr-xr-x 1 cl349 cl349 2617256 May 9 19:50 vmlinux-hybrid-stripped

__sti and __restore_flags are the ones which generate more code,
so it seemed more sensible to make the out of line.

Any conlusions?

christian

[1] __cli, __sti, __save_flags, __restore_flags, __save_and_cli, irqs_disabled

2006-05-09 21:53:54

by Chris Wright

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

* Christian Limpach ([email protected]) wrote:
> Everything[1] in line:
> -rwxr-xr-x 1 cl349 cl349 2633640 May 9 19:42 vmlinux-inline-stripped
> Everything out of line:
> -rwxr-xr-x 1 cl349 cl349 2621352 May 9 19:45 vmlinux-outline-stripped

Have the output of 'size vmlinux*' handy? Be nice to get the extra
details.

thanks,
-chris

2006-05-09 21:56:36

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery


> Everything[1] in line:
> -rwxr-xr-x 1 cl349 cl349 2633640 May 9 19:42 vmlinux-inline-stripped
> Everything out of line:
> -rwxr-xr-x 1 cl349 cl349 2621352 May 9 19:45 vmlinux-outline-stripped
>
> Additionally, I changed did a build with only __sti and __restore_flags
> out of line and the others in line:
> -rwxr-xr-x 1 cl349 cl349 2617256 May 9 19:50 vmlinux-hybrid-stripped
>
> __sti and __restore_flags are the ones which generate more code,
> so it seemed more sensible to make the out of line.
>
> Any conlusions?

It looks like hybrid is a clear winner at least from the code size, isn't it?

I doubt you will be able to benchmark the difference for anything else
anyways so might as well aim for that.

-Andi

2006-05-10 10:35:42

by Christian Limpach

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

On Tue, May 09, 2006 at 11:56:28PM +0200, Andi Kleen wrote:
>
> > Everything[1] in line:
> > -rwxr-xr-x 1 cl349 cl349 2633640 May 9 19:42 vmlinux-inline-stripped
> > Everything out of line:
> > -rwxr-xr-x 1 cl349 cl349 2621352 May 9 19:45 vmlinux-outline-stripped
> >
> > Additionally, I changed did a build with only __sti and __restore_flags
> > out of line and the others in line:
> > -rwxr-xr-x 1 cl349 cl349 2617256 May 9 19:50 vmlinux-hybrid-stripped
> >
> > __sti and __restore_flags are the ones which generate more code,
> > so it seemed more sensible to make the out of line.
> >
> > Any conlusions?
>
> It looks like hybrid is a clear winner at least from the code size, isn't it?

Yes, which is why I measured that one as well.

Now, the original concern was that we have the five operations implemented
as multi-line macros and doing a hybrid solution doesn't really address
that.

Also, it's not quite clear to me what's the best way to turn three of
the five into functions, whether inline or not.

For measuring the sizes, I did the following:
add void ___restore_flags(unsigned long *x) with the implementation
and then:
#define __restore_flags(x) ___restore_flags(&(x))

Alternatively, would it make sense to change __restore_flags to take
a pointer to flags instead? That would be quite an invasive change...

Any thoughts?

christian

2006-05-10 10:54:33

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC PATCH 15/35] subarch support for controlling interrupt delivery

es, which is why I measured that one as well.
>
> Now, the original concern was that we have the five operations implemented
> as multi-line macros and doing a hybrid solution doesn't really address
> that.

If it's straight-forward to convert to an inline do it. If not keep
it as a macro. After all code style is just a tool, not something
self serving.

>
> Also, it's not quite clear to me what's the best way to turn three of
> the five into functions, whether inline or not.
>
> For measuring the sizes, I did the following:
> add void ___restore_flags(unsigned long *x) with the implementation
> and then:
> #define __restore_flags(x) ___restore_flags(&(x))

Yes that is the standard way to do it

> Alternatively, would it make sense to change __restore_flags to take
> a pointer to flags instead? That would be quite an invasive change...

No.

-Andi