2010-06-29 14:31:53

by Jan Beulich

[permalink] [raw]
Subject: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

Use the (alternative instructions based) callout hooks to the ticket
spinlock code to enlighten ticket locks when running fully virtualized
on Xen. Ultimately, this code might also be a candidate to be used
when running para-virtualized.

Signed-off-by: Jan Beulich <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: KY Srinivasan <[email protected]>

---
arch/x86/include/asm/hypervisor.h | 1
arch/x86/include/asm/spinlock_types.h | 17 +-
arch/x86/include/asm/xen/cpuid.h | 68 ++++++++
arch/x86/kernel/cpu/Makefile | 2
arch/x86/kernel/cpu/hypervisor.c | 1
arch/x86/kernel/cpu/xen.c | 269 ++++++++++++++++++++++++++++++++++
6 files changed, 355 insertions(+), 3 deletions(-)

--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/hypervisor.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/hypervisor.h
@@ -45,5 +45,6 @@ extern const struct hypervisor_x86 *x86_
/* Recognized hypervisors */
extern const struct hypervisor_x86 x86_hyper_vmware;
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+extern const struct hypervisor_x86 x86_hyper_xen;

#endif
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock_types.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock_types.h
@@ -5,11 +5,24 @@
# error "please don't include this file directly"
#endif

+#include <asm/types.h>
+
typedef struct arch_spinlock {
- unsigned int slock;
+ union {
+ unsigned int slock;
+#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
+ struct {
+# if CONFIG_NR_CPUS < 256
+ u8 cur, seq;
+# else
+ u16 cur, seq;
+# endif
+ };
+#endif
+ };
} arch_spinlock_t;

-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }

typedef struct {
unsigned int lock;
--- /dev/null
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/xen/cpuid.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * arch-x86/cpuid.h
+ *
+ * CPUID interface to Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2007 Citrix Systems, Inc.
+ *
+ * Authors:
+ * Keir Fraser <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__
+#define __XEN_PUBLIC_ARCH_X86_CPUID_H__
+
+/* Xen identification leaves start at 0x40000000. */
+#define XEN_CPUID_FIRST_LEAF 0x40000000
+#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i))
+
+/*
+ * Leaf 1 (0x40000000)
+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
+ * are supported by the Xen host.
+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
+ * of a Xen host.
+ */
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+/*
+ * Leaf 2 (0x40000001)
+ * EAX[31:16]: Xen major version.
+ * EAX[15: 0]: Xen minor version.
+ * EBX-EDX: Reserved (currently all zeroes).
+ */
+
+/*
+ * Leaf 3 (0x40000002)
+ * EAX: Number of hypercall transfer pages. This register is always guaranteed
+ * to specify one hypercall page.
+ * EBX: Base address of Xen-specific MSRs.
+ * ECX: Features 1. Unused bits are set to zero.
+ * EDX: Features 2. Unused bits are set to zero.
+ */
+
+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
+
+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/Makefile
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/Makefile
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp)

obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o
-obj-y += vmware.o hypervisor.o sched.o mshyperv.o
+obj-y += vmware.o xen.o hypervisor.o sched.o mshyperv.o

obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/hypervisor.c
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/hypervisor.c
@@ -43,6 +43,7 @@ static const __initconst struct hypervis
{
&x86_hyper_vmware,
&x86_hyper_ms_hyperv,
+ &x86_hyper_xen,
};

const struct hypervisor_x86 *x86_hyper;
--- /dev/null
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/xen.c
@@ -0,0 +1,269 @@
+#define __XEN_INTERFACE_VERSION__ 0x00030207
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/stringify.h>
+#include <asm/sync_bitops.h>
+#include <asm/hypervisor.h>
+#include <asm/xen/cpuid.h>
+#include <asm/xen/hypercall.h>
+#include <xen/interface/event_channel.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/vcpu.h>
+
+#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
+struct spinning {
+ struct arch_spinlock *lock;
+ unsigned int ticket;
+ struct spinning *prev;
+};
+
+static struct shared_info *__read_mostly xen_shared_info;
+EXPORT_SYMBOL_GPL(xen_shared_info);
+
+static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+static DEFINE_PER_CPU(evtchn_port_t, poll_evtchn);
+static DEFINE_PER_CPU(struct spinning *, _spinning);
+/*
+ * Protect removal of objects: Insertion can be done lockless, and even
+ * removal itself doesn't need protection - what needs to be prevented is
+ * removed objects going out of scope (as they're living on the stack).
+ */
+static DEFINE_PER_CPU(arch_rwlock_t, spinning_rm_lock) = __ARCH_RW_LOCK_UNLOCKED;
+
+static unsigned int __read_mostly spin_count = 1000;
+static int __init setup_spin_count(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ spin_count = simple_strtoul(s, &s, 0);
+ return !*s ? 0 : -EINVAL;
+}
+early_param("spin_count", setup_spin_count);
+
+#ifndef CONFIG_XEN
+__asm__(".pushsection .text, \"ax\", @progbits\n"
+ ".p2align " __stringify(PAGE_SHIFT) "\n"
+ "hypercall_page:\n"
+ ".skip 1 << " __stringify(PAGE_SHIFT) "\n"
+ ".popsection");
+#endif
+
+static void xen_set_cpu_features(struct cpuinfo_x86 *);
+
+static void xen_spin_lock(struct arch_spinlock *lock, unsigned int token)
+{
+ arch_rwlock_t *rm_lock;
+ unsigned long flags;
+ unsigned int count;
+ struct spinning spinning;
+
+ if (unlikely(percpu_read(runstate.state) != RUNSTATE_running))
+ xen_set_cpu_features(&__get_cpu_var(cpu_info));
+
+#if TICKET_SHIFT == 8
+ token >>= TICKET_SHIFT;
+#endif
+ spinning.ticket = token;
+ spinning.lock = lock;
+ spinning.prev = percpu_read(_spinning);
+ smp_wmb();
+ percpu_write(_spinning, &spinning);
+
+ sync_clear_bit(percpu_read(poll_evtchn),
+ xen_shared_info->evtchn_pending);
+
+ for (count = spin_count; ({ barrier(); lock->cur != token; }); )
+ if (likely(cpu_online(raw_smp_processor_id()))
+ && unlikely(!--count)) {
+ struct sched_poll sched_poll;
+
+ set_xen_guest_handle(sched_poll.ports,
+ &__get_cpu_var(poll_evtchn));
+ sched_poll.nr_ports = 1;
+ sched_poll.timeout = 0;
+ HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll);
+ count = spin_count;
+ } else
+ cpu_relax();
+
+ /*
+ * If we interrupted another spinlock while it was blocking, make
+ * sure it doesn't block (again) without re-checking the lock.
+ */
+ if (spinning.prev)
+ sync_set_bit(percpu_read(poll_evtchn),
+ xen_shared_info->evtchn_pending);
+
+ percpu_write(_spinning, spinning.prev);
+ rm_lock = &__get_cpu_var(spinning_rm_lock);
+ raw_local_irq_save(flags);
+ arch_write_lock(rm_lock);
+ arch_write_unlock(rm_lock);
+ raw_local_irq_restore(flags);
+}
+
+static void xen_spin_unlock(struct arch_spinlock *lock, unsigned int token)
+{
+ unsigned int cpu;
+
+ token &= (1U << TICKET_SHIFT) - 1;
+ for_each_online_cpu(cpu) {
+ arch_rwlock_t *rm_lock;
+ unsigned long flags;
+ struct spinning *spinning;
+
+ if (cpu == raw_smp_processor_id())
+ continue;
+
+ rm_lock = &per_cpu(spinning_rm_lock, cpu);
+ raw_local_irq_save(flags);
+ arch_read_lock(rm_lock);
+
+ spinning = per_cpu(_spinning, cpu);
+ smp_rmb();
+ if (spinning
+ && (spinning->lock != lock || spinning->ticket != token))
+ spinning = NULL;
+
+ arch_read_unlock(rm_lock);
+ raw_local_irq_restore(flags);
+
+ if (unlikely(spinning)) {
+ struct evtchn_send send;
+
+ send.port = per_cpu(poll_evtchn, cpu);
+ HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
+ return;
+ }
+ }
+}
+
+static void __init _prepare_shared_info_page(void)
+{
+ struct xen_add_to_physmap xatp;
+
+ xen_shared_info = slab_is_available()
+ ? (void *)get_zeroed_page(GFP_KERNEL)
+ : alloc_bootmem_pages(PAGE_SIZE);
+
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = __pa(xen_shared_info) >> PAGE_SHIFT;
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+ BUG();
+}
+
+static void __ref prepare_shared_info_page(void)
+{
+ _prepare_shared_info_page();
+}
+#endif
+
+static bool __cpuinit xen_platform(void)
+{
+ unsigned int first = XEN_CPUID_FIRST_LEAF;
+
+#if 0 /* So far, Xen sets this only for PV guests. */
+ if (!cpu_has_hypervisor)
+ return false;
+#endif
+
+ while (first < XEN_CPUID_LEAF(0x10000)) {
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(first, &eax, &ebx, &ecx, &edx);
+ if (ebx == XEN_CPUID_SIGNATURE_EBX
+ && ecx == XEN_CPUID_SIGNATURE_ECX
+ && edx == XEN_CPUID_SIGNATURE_EDX) {
+ if (!smp_processor_id()) {
+ cpuid(first + 1, &eax, &ebx, &ecx, &edx);
+ printk(KERN_INFO "Running on Xen %u.%u\n",
+ eax >> 16, eax & 0xffff);
+ }
+ return true;
+ }
+ first += 0x100;
+ }
+
+ return false;
+}
+
+static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
+ unsigned int msr, eax, ebx, ecx, edx;
+ unsigned int first = XEN_CPUID_FIRST_LEAF;
+ int ret;
+ struct vcpu_register_runstate_memory_area vrrma;
+
+ if (num_possible_cpus() <= 1
+ || !spin_count
+ || (c != &boot_cpu_data
+ && !boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD)))
+ return;
+
+ while (first < XEN_CPUID_LEAF(0x10000)) {
+ cpuid(first, &eax, &ebx, &ecx, &edx);
+ if (ebx == XEN_CPUID_SIGNATURE_EBX
+ && ecx == XEN_CPUID_SIGNATURE_ECX
+ && edx == XEN_CPUID_SIGNATURE_EDX)
+ break;
+ first += 0x100;
+ }
+ BUG_ON(first >= XEN_CPUID_LEAF(0x10000));
+
+ cpuid(first + 2, &eax, &msr, &ecx, &edx);
+ BUG_ON(!eax);
+ wrmsrl(msr, __pa_symbol(hypercall_page));
+
+ if (!xen_shared_info)
+ prepare_shared_info_page();
+
+ memset(&vrrma, 0, sizeof(vrrma));
+ set_xen_guest_handle(vrrma.addr.h, &__get_cpu_var(runstate));
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
+ c->cpu_index, &vrrma);
+ if (ret) {
+ printk(KERN_WARNING
+ "Could not register runstate area for CPU%u: %d\n",
+ c->cpu_index, ret);
+ BUG_ON(boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD));
+ return;
+ }
+
+ if (c != &boot_cpu_data || !percpu_read(poll_evtchn)) {
+ struct evtchn_bind_ipi bind_ipi;
+
+ bind_ipi.vcpu = c->cpu_index;
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+ &bind_ipi);
+ if (ret) {
+ printk(KERN_WARNING
+ "Could not bind event channel for CPU%u: %d\n",
+ c->cpu_index, ret);
+ BUG_ON(boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD));
+ return;
+ }
+ sync_set_bit(bind_ipi.port, xen_shared_info->evtchn_mask);
+ percpu_write(poll_evtchn, bind_ipi.port);
+ printk(KERN_INFO "CPU%u spinlock poll event channel: %u\n",
+ c->cpu_index, bind_ipi.port);
+ }
+
+ virt_spin_lock = xen_spin_lock;
+ virt_spin_unlock = xen_spin_unlock;
+ set_cpu_cap(c, X86_FEATURE_SPINLOCK_YIELD);
+#endif
+}
+
+const __refconst struct hypervisor_x86 x86_hyper_xen = {
+ .name = "Xen",
+ .detect = xen_platform,
+ .set_cpu_features = xen_set_cpu_features
+};


2010-06-30 08:06:16

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On Tue, 2010-06-29 at 15:32 +0100, Jan Beulich wrote:
> +static DEFINE_PER_CPU(arch_rwlock_t, spinning_rm_lock) = __ARCH_RW_LOCK_UNLOCKED;

why is that an arch_ lock?
why is that a rwlock?, those things are useless.

2010-06-30 08:51:37

by Jan Beulich

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

>>> On 30.06.10 at 10:05, Peter Zijlstra <[email protected]> wrote:
> On Tue, 2010-06-29 at 15:32 +0100, Jan Beulich wrote:
>> +static DEFINE_PER_CPU(arch_rwlock_t, spinning_rm_lock) =
> __ARCH_RW_LOCK_UNLOCKED;
>
> why is that an arch_ lock?

Because I don't think it is appropriate to use anything higher level
in the callouts from the lock/unlock inline functions. The alternative
would be an open coded lock, which seems much less desirable to
me.

> why is that a rwlock?, those things are useless.

Because potentially each CPU's lock gets acquired for reading during
unlock, while only the locking CPU's one needs to be acquired for
writing during lock.

Jan

2010-06-30 08:56:36

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On Wed, 2010-06-30 at 09:52 +0100, Jan Beulich wrote:
> > why is that a rwlock?, those things are useless.
>
> Because potentially each CPU's lock gets acquired for reading during
> unlock, while only the locking CPU's one needs to be acquired for
> writing during lock.

Can you say: scalability nightmare? but then its Xen code so who cares..

/me pretends he never saw it

2010-06-30 09:03:54

by Jan Beulich

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

>>> On 30.06.10 at 10:56, Peter Zijlstra <[email protected]> wrote:
> On Wed, 2010-06-30 at 09:52 +0100, Jan Beulich wrote:
>> > why is that a rwlock?, those things are useless.
>>
>> Because potentially each CPU's lock gets acquired for reading during
>> unlock, while only the locking CPU's one needs to be acquired for
>> writing during lock.
>
> Can you say: scalability nightmare? but then its Xen code so who cares..

Yes, this is a scalability problem. And yes, I'm open to suggestions.
But no, I didn't have any better idea myself. And for the time being
I don't foresee this to be a problem, as VMs tend to have much fewer
CPUs than physical machines. And I think a performance win of 25%
justifies a sub-optimal implementation (with the perspective of people
smarter than me replacing it with a better one over time).

Jan

2010-06-30 10:07:45

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On 06/29/2010 04:32 PM, Jan Beulich wrote:
> Use the (alternative instructions based) callout hooks to the ticket
> spinlock code to enlighten ticket locks when running fully virtualized
> on Xen. Ultimately, this code might also be a candidate to be used
> when running para-virtualized.
>

I'm not sure what the gain is by making this independent of all the rest
of the Xen support in the kernel. Stefano is working on a series
(posted a few times now) to add more paravirtual features for Xen HVM
guests, and this work is conceptually very similar.

Also, I'm not very keen on adding yet another kind of patching mechanism
to the kernel. While they're easy enough to get working in the first
place, they do tend to be fragile when other changes get introduced
(like changes to how the kernel is mapped RO/NX, etc), and this one will
be exercised relatively rarely. I don't see why the pvops mechanism
couldn't be reused, especially now that each set of ops can be
individually configured on/off.

This is especially acute in the case where you are using a full
pvops-using PV kernel, where it ends up using two mechanisms to put
paravirtualizations in place.

> Signed-off-by: Jan Beulich <[email protected]>
> Cc: Jeremy Fitzhardinge <[email protected]>
> Cc: KY Srinivasan <[email protected]>
>
> ---
> arch/x86/include/asm/hypervisor.h | 1
> arch/x86/include/asm/spinlock_types.h | 17 +-
> arch/x86/include/asm/xen/cpuid.h | 68 ++++++++
> arch/x86/kernel/cpu/Makefile | 2
> arch/x86/kernel/cpu/hypervisor.c | 1
> arch/x86/kernel/cpu/xen.c | 269 ++++++++++++++++++++++++++++++++++
> 6 files changed, 355 insertions(+), 3 deletions(-)
>
> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/hypervisor.h
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/hypervisor.h
> @@ -45,5 +45,6 @@ extern const struct hypervisor_x86 *x86_
> /* Recognized hypervisors */
> extern const struct hypervisor_x86 x86_hyper_vmware;
> extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
> +extern const struct hypervisor_x86 x86_hyper_xen;
>
> #endif
> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock_types.h
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock_types.h
> @@ -5,11 +5,24 @@
> # error "please don't include this file directly"
> #endif
>
> +#include <asm/types.h>
> +
> typedef struct arch_spinlock {
> - unsigned int slock;
> + union {
> + unsigned int slock;
> +#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
> + struct {
> +# if CONFIG_NR_CPUS < 256
> + u8 cur, seq;
> +# else
> + u16 cur, seq;
> +# endif
> + };
> +#endif
> + };
> } arch_spinlock_t;
>
> -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
> +#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
>
> typedef struct {
> unsigned int lock;
> --- /dev/null
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/xen/cpuid.h
> @@ -0,0 +1,68 @@
> +/******************************************************************************
> + * arch-x86/cpuid.h
> + *
> + * CPUID interface to Xen.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2007 Citrix Systems, Inc.
> + *
> + * Authors:
> + * Keir Fraser <[email protected]>
> + */
> +
> +#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__
> +#define __XEN_PUBLIC_ARCH_X86_CPUID_H__
> +
> +/* Xen identification leaves start at 0x40000000. */
> +#define XEN_CPUID_FIRST_LEAF 0x40000000
> +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i))
> +
> +/*
> + * Leaf 1 (0x40000000)
> + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
> + * are supported by the Xen host.
> + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
> + * of a Xen host.
> + */
> +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
> +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
> +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
> +
> +/*
> + * Leaf 2 (0x40000001)
> + * EAX[31:16]: Xen major version.
> + * EAX[15: 0]: Xen minor version.
> + * EBX-EDX: Reserved (currently all zeroes).
> + */
> +
> +/*
> + * Leaf 3 (0x40000002)
> + * EAX: Number of hypercall transfer pages. This register is always guaranteed
> + * to specify one hypercall page.
> + * EBX: Base address of Xen-specific MSRs.
> + * ECX: Features 1. Unused bits are set to zero.
> + * EDX: Features 2. Unused bits are set to zero.
> + */
> +
> +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
> +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
> +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
> +
> +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/Makefile
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/Makefile
> @@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp)
>
> obj-y := intel_cacheinfo.o addon_cpuid_features.o
> obj-y += proc.o capflags.o powerflags.o common.o
> -obj-y += vmware.o hypervisor.o sched.o mshyperv.o
> +obj-y += vmware.o xen.o hypervisor.o sched.o mshyperv.o
>
> obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
> obj-$(CONFIG_X86_64) += bugs_64.o
> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/hypervisor.c
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/hypervisor.c
> @@ -43,6 +43,7 @@ static const __initconst struct hypervis
> {
> &x86_hyper_vmware,
> &x86_hyper_ms_hyperv,
> + &x86_hyper_xen,
> };
>
> const struct hypervisor_x86 *x86_hyper;
> --- /dev/null
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/xen.c
> @@ -0,0 +1,269 @@
> +#define __XEN_INTERFACE_VERSION__ 0x00030207
> +#include <linux/bootmem.h>
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/percpu.h>
> +#include <linux/slab.h>
> +#include <linux/smp.h>
> +#include <linux/spinlock.h>
> +#include <linux/stringify.h>
> +#include <asm/sync_bitops.h>
> +#include <asm/hypervisor.h>
> +#include <asm/xen/cpuid.h>
> +#include <asm/xen/hypercall.h>
> +#include <xen/interface/event_channel.h>
> +#include <xen/interface/memory.h>
> +#include <xen/interface/vcpu.h>
> +
> +#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
> +struct spinning {
> + struct arch_spinlock *lock;
> + unsigned int ticket;
> + struct spinning *prev;
> +};
> +
> +static struct shared_info *__read_mostly xen_shared_info;
> +EXPORT_SYMBOL_GPL(xen_shared_info);
> +
> +static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
> +static DEFINE_PER_CPU(evtchn_port_t, poll_evtchn);
> +static DEFINE_PER_CPU(struct spinning *, _spinning);
> +/*
> + * Protect removal of objects: Insertion can be done lockless, and even
> + * removal itself doesn't need protection - what needs to be prevented is
> + * removed objects going out of scope (as they're living on the stack).
> + */
> +static DEFINE_PER_CPU(arch_rwlock_t, spinning_rm_lock) = __ARCH_RW_LOCK_UNLOCKED;
> +
> +static unsigned int __read_mostly spin_count = 1000;
> +static int __init setup_spin_count(char *s)
> +{
> + if (!s)
> + return -EINVAL;
> + spin_count = simple_strtoul(s, &s, 0);
> + return !*s ? 0 : -EINVAL;
> +}
> +early_param("spin_count", setup_spin_count);
> +
> +#ifndef CONFIG_XEN
> +__asm__(".pushsection .text, \"ax\", @progbits\n"
> + ".p2align " __stringify(PAGE_SHIFT) "\n"
> + "hypercall_page:\n"
> + ".skip 1 << " __stringify(PAGE_SHIFT) "\n"
> + ".popsection");
> +#endif
> +
> +static void xen_set_cpu_features(struct cpuinfo_x86 *);
> +
> +static void xen_spin_lock(struct arch_spinlock *lock, unsigned int token)
> +{
> + arch_rwlock_t *rm_lock;
> + unsigned long flags;
> + unsigned int count;
> + struct spinning spinning;
> +
> + if (unlikely(percpu_read(runstate.state) != RUNSTATE_running))
> + xen_set_cpu_features(&__get_cpu_var(cpu_info));
> +
> +#if TICKET_SHIFT == 8
> + token >>= TICKET_SHIFT;
> +#endif
> + spinning.ticket = token;
> + spinning.lock = lock;
> + spinning.prev = percpu_read(_spinning);
> + smp_wmb();
> + percpu_write(_spinning, &spinning);
> +
> + sync_clear_bit(percpu_read(poll_evtchn),
> + xen_shared_info->evtchn_pending);
> +
> + for (count = spin_count; ({ barrier(); lock->cur != token; }); )
> + if (likely(cpu_online(raw_smp_processor_id()))
> + && unlikely(!--count)) {
> + struct sched_poll sched_poll;
> +
> + set_xen_guest_handle(sched_poll.ports,
> + &__get_cpu_var(poll_evtchn));
> + sched_poll.nr_ports = 1;
> + sched_poll.timeout = 0;
> + HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll);
> + count = spin_count;
> + } else
> + cpu_relax();
> +
> + /*
> + * If we interrupted another spinlock while it was blocking, make
> + * sure it doesn't block (again) without re-checking the lock.
> + */
> + if (spinning.prev)
> + sync_set_bit(percpu_read(poll_evtchn),
> + xen_shared_info->evtchn_pending);
> +
> + percpu_write(_spinning, spinning.prev);
> + rm_lock = &__get_cpu_var(spinning_rm_lock);
> + raw_local_irq_save(flags);
> + arch_write_lock(rm_lock);
> + arch_write_unlock(rm_lock);
> + raw_local_irq_restore(flags);
> +}
> +
> +static void xen_spin_unlock(struct arch_spinlock *lock, unsigned int token)
> +{
> + unsigned int cpu;
> +
> + token &= (1U << TICKET_SHIFT) - 1;
> + for_each_online_cpu(cpu) {
> + arch_rwlock_t *rm_lock;
> + unsigned long flags;
> + struct spinning *spinning;
> +
> + if (cpu == raw_smp_processor_id())
> + continue;
> +
> + rm_lock = &per_cpu(spinning_rm_lock, cpu);
> + raw_local_irq_save(flags);
> + arch_read_lock(rm_lock);
> +
> + spinning = per_cpu(_spinning, cpu);
> + smp_rmb();
> + if (spinning
> + && (spinning->lock != lock || spinning->ticket != token))
> + spinning = NULL;
> +
> + arch_read_unlock(rm_lock);
> + raw_local_irq_restore(flags);
> +
> + if (unlikely(spinning)) {
> + struct evtchn_send send;
> +
> + send.port = per_cpu(poll_evtchn, cpu);
> + HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
> + return;
> + }
> + }
> +}
> +
> +static void __init _prepare_shared_info_page(void)
> +{
> + struct xen_add_to_physmap xatp;
> +
> + xen_shared_info = slab_is_available()
> + ? (void *)get_zeroed_page(GFP_KERNEL)
> + : alloc_bootmem_pages(PAGE_SIZE);
> +
> + xatp.domid = DOMID_SELF;
> + xatp.idx = 0;
> + xatp.space = XENMAPSPACE_shared_info;
> + xatp.gpfn = __pa(xen_shared_info) >> PAGE_SHIFT;
> + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
> + BUG();
> +}
> +
> +static void __ref prepare_shared_info_page(void)
> +{
> + _prepare_shared_info_page();
> +}
> +#endif
> +
> +static bool __cpuinit xen_platform(void)
> +{
> + unsigned int first = XEN_CPUID_FIRST_LEAF;
> +
> +#if 0 /* So far, Xen sets this only for PV guests. */
> + if (!cpu_has_hypervisor)
> + return false;
> +#endif
> +
> + while (first < XEN_CPUID_LEAF(0x10000)) {
> + unsigned int eax, ebx, ecx, edx;
> +
> + cpuid(first, &eax, &ebx, &ecx, &edx);
> + if (ebx == XEN_CPUID_SIGNATURE_EBX
> + && ecx == XEN_CPUID_SIGNATURE_ECX
> + && edx == XEN_CPUID_SIGNATURE_EDX) {
> + if (!smp_processor_id()) {
> + cpuid(first + 1, &eax, &ebx, &ecx, &edx);
> + printk(KERN_INFO "Running on Xen %u.%u\n",
> + eax >> 16, eax & 0xffff);
> + }
> + return true;
> + }
> + first += 0x100;
> + }
> +
> + return false;
> +}
> +
> +static void xen_set_cpu_features(struct cpuinfo_x86 *c)
> +{
> +#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
> + unsigned int msr, eax, ebx, ecx, edx;
> + unsigned int first = XEN_CPUID_FIRST_LEAF;
> + int ret;
> + struct vcpu_register_runstate_memory_area vrrma;
> +
> + if (num_possible_cpus() <= 1
> + || !spin_count
> + || (c != &boot_cpu_data
> + && !boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD)))
> + return;
> +
> + while (first < XEN_CPUID_LEAF(0x10000)) {
> + cpuid(first, &eax, &ebx, &ecx, &edx);
> + if (ebx == XEN_CPUID_SIGNATURE_EBX
> + && ecx == XEN_CPUID_SIGNATURE_ECX
> + && edx == XEN_CPUID_SIGNATURE_EDX)
> + break;
> + first += 0x100;
> + }
> + BUG_ON(first >= XEN_CPUID_LEAF(0x10000));
> +
> + cpuid(first + 2, &eax, &msr, &ecx, &edx);
> + BUG_ON(!eax);
> + wrmsrl(msr, __pa_symbol(hypercall_page));
> +
> + if (!xen_shared_info)
> + prepare_shared_info_page();
> +
> + memset(&vrrma, 0, sizeof(vrrma));
> + set_xen_guest_handle(vrrma.addr.h, &__get_cpu_var(runstate));
> + ret = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> + c->cpu_index, &vrrma);
> + if (ret) {
> + printk(KERN_WARNING
> + "Could not register runstate area for CPU%u: %d\n",
> + c->cpu_index, ret);
> + BUG_ON(boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD));
> + return;
> + }
> +
> + if (c != &boot_cpu_data || !percpu_read(poll_evtchn)) {
> + struct evtchn_bind_ipi bind_ipi;
> +
> + bind_ipi.vcpu = c->cpu_index;
> + ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
> + &bind_ipi);
> + if (ret) {
> + printk(KERN_WARNING
> + "Could not bind event channel for CPU%u: %d\n",
> + c->cpu_index, ret);
> + BUG_ON(boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD));
> + return;
> + }
> + sync_set_bit(bind_ipi.port, xen_shared_info->evtchn_mask);
> + percpu_write(poll_evtchn, bind_ipi.port);
> + printk(KERN_INFO "CPU%u spinlock poll event channel: %u\n",
> + c->cpu_index, bind_ipi.port);
> + }
> +
> + virt_spin_lock = xen_spin_lock;
> + virt_spin_unlock = xen_spin_unlock;
> + set_cpu_cap(c, X86_FEATURE_SPINLOCK_YIELD);
> +#endif
> +}
> +
> +const __refconst struct hypervisor_x86 x86_hyper_xen = {
> + .name = "Xen",
> + .detect = xen_platform,
> + .set_cpu_features = xen_set_cpu_features
> +};
>
>
>

2010-06-30 11:30:38

by Jan Beulich

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

>>> On 30.06.10 at 12:07, Jeremy Fitzhardinge <[email protected]> wrote:
> On 06/29/2010 04:32 PM, Jan Beulich wrote:
>> Use the (alternative instructions based) callout hooks to the ticket
>> spinlock code to enlighten ticket locks when running fully virtualized
>> on Xen. Ultimately, this code might also be a candidate to be used
>> when running para-virtualized.
>>
>
> I'm not sure what the gain is by making this independent of all the rest
> of the Xen support in the kernel. Stefano is working on a series
> (posted a few times now) to add more paravirtual features for Xen HVM
> guests, and this work is conceptually very similar.

The intention really is for PARAVIRT_SPINLOCKS to go away as soon
as pv-ops Xen can be switched over to this mechanism.

> Also, I'm not very keen on adding yet another kind of patching mechanism
> to the kernel. While they're easy enough to get working in the first
> place, they do tend to be fragile when other changes get introduced
> (like changes to how the kernel is mapped RO/NX, etc), and this one will
> be exercised relatively rarely. I don't see why the pvops mechanism
> couldn't be reused, especially now that each set of ops can be
> individually configured on/off.

Wasn't the main complaint with using pvops patching that it
introduced extra calls into the native execution path? The point
of this "new" (it's not really new, it's using existing infrastructure)
mechanism is just to avoid such overhead for native.

> This is especially acute in the case where you are using a full
> pvops-using PV kernel, where it ends up using two mechanisms to put
> paravirtualizations in place.

And I see nothing wrong with this - if the individual pieces are
separate anyway, why shouldn't each of them use the most
efficient technique? Or if a single mechanism is desirable, shouldn't
one rather ask to convert the newer pvops patching mechanism
to the alternative instruction patching one, as that had been in
place long before?

Jan

2010-06-30 13:23:54

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On 06/30/2010 01:31 PM, Jan Beulich wrote:
>>>> On 30.06.10 at 12:07, Jeremy Fitzhardinge <[email protected]> wrote:
>>>>
>> On 06/29/2010 04:32 PM, Jan Beulich wrote:
>>
>>> Use the (alternative instructions based) callout hooks to the ticket
>>> spinlock code to enlighten ticket locks when running fully virtualized
>>> on Xen. Ultimately, this code might also be a candidate to be used
>>> when running para-virtualized.
>>>
>>>
>> I'm not sure what the gain is by making this independent of all the rest
>> of the Xen support in the kernel. Stefano is working on a series
>> (posted a few times now) to add more paravirtual features for Xen HVM
>> guests, and this work is conceptually very similar.
>>
> The intention really is for PARAVIRT_SPINLOCKS to go away as soon
> as pv-ops Xen can be switched over to this mechanism.
>

I don't see the point of having two distinct implementations of
paravirtualization, especially since they have similar mechanisms
(patching, etc).

>> Also, I'm not very keen on adding yet another kind of patching mechanism
>> to the kernel. While they're easy enough to get working in the first
>> place, they do tend to be fragile when other changes get introduced
>> (like changes to how the kernel is mapped RO/NX, etc), and this one will
>> be exercised relatively rarely. I don't see why the pvops mechanism
>> couldn't be reused, especially now that each set of ops can be
>> individually configured on/off.
>>
> Wasn't the main complaint with using pvops patching that it
> introduced extra calls into the native execution path? The point
> of this "new" (it's not really new, it's using existing infrastructure)
> mechanism is just to avoid such overhead for native.
>

When a particular class of pv calls is enabled in the config file, then
their baseline overhead amounts to a 6 byte nop. When in use, they are
a direct call (or <= 6 bytes of inlined instruction). It's possible to
add more padding space if its important to have larger inlined sequences.

For spinlocks, the pvop calls should only be in the slow case: when a
spinlock has been spinning for long enough, and on unlock when there's
someone waiting for the lock. The fastpath (no contention lock and
unlock) should have no extra calls.

So I don't think pvops overhead is really an issue here. Certainly I
don't think its worth prematurely optimising for.

>> This is especially acute in the case where you are using a full
>> pvops-using PV kernel, where it ends up using two mechanisms to put
>> paravirtualizations in place.
>>
> And I see nothing wrong with this - if the individual pieces are
> separate anyway, why shouldn't each of them use the most
> efficient technique?

Pluralitas non est ponenda sine necessitate.

Each of them doesn't need the most efficient technique, as that just
multiplies the number of different mechanisms which need to be
maintained. New mechanisms should only be introduced if one of the
existing ones is really, clearly, deficient.

> Or if a single mechanism is desirable, shouldn't
> one rather ask to convert the newer pvops patching mechanism
> to the alternative instruction patching one, as that had been in
> place long before?

pvops is a superset of alternative instruction patching, and are really
designed to serve different purposes. There are some areas in which
there's some overlap, but otherwise they are distinct. In particular,
alternative instructions are really only useful if you can express the
patch in terms of the presence or absence of a particular cpu feature.
It can't do multi-way choice, and it can't do anything other than insert
literal instructions. pvops patching can do multi-way, and has a
higher-level view of each patch site which allows it to do things like
generate appropraite save/restores, make inline vs call decisions, nop
out nop callsites, etc.

J

2010-06-30 14:03:10

by Jan Beulich

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

>>> On 30.06.10 at 15:23, Jeremy Fitzhardinge <[email protected]> wrote:
> For spinlocks, the pvop calls should only be in the slow case: when a
> spinlock has been spinning for long enough, and on unlock when there's
> someone waiting for the lock. The fastpath (no contention lock and
> unlock) should have no extra calls.

Then what was all that performance regression noise concerning
pvops spinlocks about, leading to CONFIG_PARAVIRT_SPINLOCKS
being separated from the base CONFIG_PARAVIRT?

Afaics the unlock still involves a function call *in all cases* with
pvops spinlocks, whereas it's a single inline instruction without.

Jan

2010-06-30 14:25:28

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On 06/30/2010 04:03 PM, Jan Beulich wrote:
>>>> On 30.06.10 at 15:23, Jeremy Fitzhardinge <[email protected]> wrote:
>>>>
>> For spinlocks, the pvop calls should only be in the slow case: when a
>> spinlock has been spinning for long enough, and on unlock when there's
>> someone waiting for the lock. The fastpath (no contention lock and
>> unlock) should have no extra calls.
>>
> Then what was all that performance regression noise concerning
> pvops spinlocks about, leading to CONFIG_PARAVIRT_SPINLOCKS
> being separated from the base CONFIG_PARAVIRT?
>

Nobody knows. The pv spinlocks appeared to cause a 5% performance
regression on some benchmarks, which is wildly huge. It appears to
trigger some kind of microarchitectural catastrophe on some Intel cpus,
perhaps relating to the extra call in the path or something.

> Afaics the unlock still involves a function call *in all cases* with
> pvops spinlocks, whereas it's a single inline instruction without.
>

No. The unlock path can see if there are any further waiters by looking
at the ticket in the, and only do the kick call if there are some.

J

2010-06-30 14:36:12

by Jan Beulich

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

>>> On 30.06.10 at 16:25, Jeremy Fitzhardinge <[email protected]> wrote:
> On 06/30/2010 04:03 PM, Jan Beulich wrote:
>> Afaics the unlock still involves a function call *in all cases* with
>> pvops spinlocks, whereas it's a single inline instruction without.
>>
>
> No. The unlock path can see if there are any further waiters by looking
> at the ticket in the, and only do the kick call if there are some.

Are we perhaps talking about different things? I'm referring to

static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
{
PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
}

which is an indirect call which, as I understand it, gets replaced
with a direct one at runtime. But it remains to be a call (as opposed
to being a single inc instructions without CONFIG_PARAVIRT_SPINLOCKS).

Jan

2010-06-30 14:42:24

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On 06/30/2010 04:36 PM, Jan Beulich wrote:
> Are we perhaps talking about different things? I'm referring to
>
> static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
> {
> PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
> }
>
> which is an indirect call which, as I understand it, gets replaced
> with a direct one at runtime. But it remains to be a call (as opposed
> to being a single inc instructions without CONFIG_PARAVIRT_SPINLOCKS).
>

Sorry, I'm referring to pv ticketlocks, not the current
PARAVIRT_SPINLOCKS code. I agree the current PARAVIRT_SPINLOCKS
implementation is suboptimal and needs to be replaced with something
that's only called on the slow path. I just think the existing
paravirt_ops mechanism can be used to implement it rather than adding
something new.

J

2010-06-30 15:57:24

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On Wed, 30 Jun 2010, Jeremy Fitzhardinge wrote:
> On 06/29/2010 04:32 PM, Jan Beulich wrote:
> > Use the (alternative instructions based) callout hooks to the ticket
> > spinlock code to enlighten ticket locks when running fully virtualized
> > on Xen. Ultimately, this code might also be a candidate to be used
> > when running para-virtualized.
> >
>
> I'm not sure what the gain is by making this independent of all the rest
> of the Xen support in the kernel. Stefano is working on a series
> (posted a few times now) to add more paravirtual features for Xen HVM
> guests, and this work is conceptually very similar.

My series has been stable since a while now and contains all the basic
PV on HVM mechanisms you need, including parsing the cpuid and mapping
the shared info page.

Could you please rebase on my series (or at least the first two
patches), so that we don't conflict with each other?

Thanks,

Stefano

2010-06-30 22:14:29

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On 06/30/2010 06:23 AM, Jeremy Fitzhardinge wrote:
>
> pvops is a superset of alternative instruction patching, and are really
> designed to serve different purposes. There are some areas in which
> there's some overlap, but otherwise they are distinct. In particular,
> alternative instructions are really only useful if you can express the
> patch in terms of the presence or absence of a particular cpu feature.
> It can't do multi-way choice, and it can't do anything other than insert
> literal instructions. pvops patching can do multi-way, and has a
> higher-level view of each patch site which allows it to do things like
> generate appropraite save/restores, make inline vs call decisions, nop
> out nop callsites, etc.
>

A lot of this -- in particular the multiway -- is a defect in the
alternatives implementation and should have been addressed as such. One
of the biggest problems with pvops as it currently stands is that it is
monolithic; in general we have this class of problems (static selection)
in a *lot* more places than we're dealing with right now, and as such,
generalizing *something* -- be it pvops or alternatives -- would be useful.

gcc 4.5 also includes a very powerful facility called "asm goto", which
I have already used to implement static_cpu_has(). Again, that
particular construct (unlike "asm goto" itself) doesn't support multiway.

-hpa

2010-07-01 07:56:58

by Jan Beulich

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

>>> On 30.06.10 at 17:57, Stefano Stabellini <[email protected]>
wrote:
> On Wed, 30 Jun 2010, Jeremy Fitzhardinge wrote:
>> On 06/29/2010 04:32 PM, Jan Beulich wrote:
>> > Use the (alternative instructions based) callout hooks to the ticket
>> > spinlock code to enlighten ticket locks when running fully virtualized
>> > on Xen. Ultimately, this code might also be a candidate to be used
>> > when running para-virtualized.
>> >
>>
>> I'm not sure what the gain is by making this independent of all the rest
>> of the Xen support in the kernel. Stefano is working on a series
>> (posted a few times now) to add more paravirtual features for Xen HVM
>> guests, and this work is conceptually very similar.
>
> My series has been stable since a while now and contains all the basic
> PV on HVM mechanisms you need, including parsing the cpuid and mapping
> the shared info page.
>
> Could you please rebase on my series (or at least the first two
> patches), so that we don't conflict with each other?

I really don't want to make those patches depend on no upstream
stuff (as I want it accepted upstream), and I'm not sure when your
patch series is expected to be upstream. If that's going to be soon,
I'd just re-base after it happened.

Jan

2010-07-01 11:39:12

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On Thu, 1 Jul 2010, Jan Beulich wrote:
> >>> On 30.06.10 at 17:57, Stefano Stabellini <[email protected]>
> wrote:
> > On Wed, 30 Jun 2010, Jeremy Fitzhardinge wrote:
> >> On 06/29/2010 04:32 PM, Jan Beulich wrote:
> >> > Use the (alternative instructions based) callout hooks to the ticket
> >> > spinlock code to enlighten ticket locks when running fully virtualized
> >> > on Xen. Ultimately, this code might also be a candidate to be used
> >> > when running para-virtualized.
> >> >
> >>
> >> I'm not sure what the gain is by making this independent of all the rest
> >> of the Xen support in the kernel. Stefano is working on a series
> >> (posted a few times now) to add more paravirtual features for Xen HVM
> >> guests, and this work is conceptually very similar.
> >
> > My series has been stable since a while now and contains all the basic
> > PV on HVM mechanisms you need, including parsing the cpuid and mapping
> > the shared info page.
> >
> > Could you please rebase on my series (or at least the first two
> > patches), so that we don't conflict with each other?
>
> I really don't want to make those patches depend on no upstream
> stuff (as I want it accepted upstream), and I'm not sure when your
> patch series is expected to be upstream. If that's going to be soon,
> I'd just re-base after it happened.

Given that the patch series has been tested and reviewed several times
by now, I don't expect big changes any more.
Therefore I hope it will be accepted as soon as possible (keeping in
mind that we are in RC3 right now).

2010-07-05 23:12:46

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [PATCH 2/4, v2] x86: enlightenment for ticket spin locks - Xen implementation

On 06/30/2010 03:14 PM, H. Peter Anvin wrote:
> On 06/30/2010 06:23 AM, Jeremy Fitzhardinge wrote:
>
>> pvops is a superset of alternative instruction patching, and are really
>> designed to serve different purposes. There are some areas in which
>> there's some overlap, but otherwise they are distinct. In particular,
>> alternative instructions are really only useful if you can express the
>> patch in terms of the presence or absence of a particular cpu feature.
>> It can't do multi-way choice, and it can't do anything other than insert
>> literal instructions. pvops patching can do multi-way, and has a
>> higher-level view of each patch site which allows it to do things like
>> generate appropraite save/restores, make inline vs call decisions, nop
>> out nop callsites, etc.
>>
>>
> A lot of this -- in particular the multiway -- is a defect in the
> alternatives implementation and should have been addressed as such.

The pvops and asm alternatives have different design goals. Asm
alternatives is all about replacing particular instructions with
different ones depending on the properties of the CPU. pvops is about
inserting an ops vector in front of various OS-level interfaces to allow
alternate implementations; the patching part was a later optimisation
sprinkled on top to reduce the abstraction overhead. The fact that
there's some similarity in mechanism is the result of convergence rather
than a desire to reinvent.

> One
> of the biggest problems with pvops as it currently stands is that it is
> monolithic; in general we have this class of problems (static selection)
> in a *lot* more places than we're dealing with right now, and as such,
> generalizing *something* -- be it pvops or alternatives -- would be useful.
>

Yes. The module system might also be a candidate for making more
general (by allowing the kernel to have unbound references, and the
module system can be used to create a runtime binding). Modules are
already arch neutral (the arch-specific bits), which is an improvement
over alternatives or pvops, but it has no notion of patching anything
beyond linker relocs (but aside from very hot-path pvops such as
interrupt enable/disable, instruction patching isn't used all that much).

> gcc 4.5 also includes a very powerful facility called "asm goto", which
> I have already used to implement static_cpu_has(). Again, that
> particular construct (unlike "asm goto" itself) doesn't support multiway.
>

What does the fallback for older compilers look like?

J