Hi all,
this patch series introduces stolen ticks accounting for Xen on ARM.
Stolen ticks are clocksource ticks that have been "stolen" from the cpu,
typically because Linux is running in a virtual machine and the vcpu has
been descheduled.
To account for these ticks we introduce CONFIG_PARAVIRT and pv_time_ops
so that we can make use of:
kernel/sched/cputime.c:steal_account_process_tick
Stefano Stabellini (4):
xen: move xen_setup_runstate_info and get_runstate_snapshot to drivers/xen/time.c
kernel: missing include in cputime.c
arm: introduce CONFIG_PARAVIRT, PARAVIRT_TIME_ACCOUNTING and pv_time_ops
xen/arm: account for stolen ticks
arch/arm/Kconfig | 20 +++++++++
arch/arm/include/asm/paravirt.h | 19 ++++++++
arch/arm/kernel/Makefile | 1 +
arch/arm/kernel/paravirt.c | 25 +++++++++++
arch/arm/xen/enlighten.c | 21 +++++++++
arch/ia64/xen/time.c | 48 +++------------------
arch/x86/xen/time.c | 76 +--------------------------------
drivers/xen/Makefile | 2 +-
drivers/xen/time.c | 91 +++++++++++++++++++++++++++++++++++++++
include/xen/xen-ops.h | 5 ++
kernel/sched/cputime.c | 1 +
11 files changed, 191 insertions(+), 118 deletions(-)
create mode 100644 arch/arm/include/asm/paravirt.h
create mode 100644 arch/arm/kernel/paravirt.c
create mode 100644 drivers/xen/time.c
git://git.kernel.org/pub/scm/linux/kernel/git/sstabellini/xen.git lost_ticks_4
Cheers,
Stefano
Signed-off-by: Stefano Stabellini <[email protected]>
CC: [email protected]
CC: [email protected]
---
kernel/sched/cputime.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index cc2dc3ee..a5d1a9b 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -5,6 +5,7 @@
#include <linux/static_key.h>
#include <linux/context_tracking.h>
#include "sched.h"
+#include <asm/paravirt.h>
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
--
1.7.2.5
Signed-off-by: Stefano Stabellini <[email protected]>
Acked-by: Ian Campbell <[email protected]>
CC: [email protected]
Changes in v2:
- leave do_stolen_accounting in arch/x86/xen/time.c;
- use the new common functions in arch/ia64/xen/time.c.
---
arch/ia64/xen/time.c | 48 +++----------------------
arch/x86/xen/time.c | 76 +----------------------------------------
drivers/xen/Makefile | 2 +-
drivers/xen/time.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
include/xen/xen-ops.h | 5 +++
5 files changed, 104 insertions(+), 118 deletions(-)
create mode 100644 drivers/xen/time.c
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index 1f8244a..79a0b8c 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -34,53 +34,17 @@
#include "../kernel/fsyscall_gtod_data.h"
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
/* taken from i386/kernel/time-xen.c */
static void xen_init_missing_ticks_accounting(int cpu)
{
- struct vcpu_register_runstate_memory_area area;
- struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
- int rc;
+ xen_setup_runstate_info(&runstate);
- memset(runstate, 0, sizeof(*runstate));
-
- area.addr.v = runstate;
- rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
- &area);
- WARN_ON(rc && rc != -ENOSYS);
-
- per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
- per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
- + runstate->time[RUNSTATE_offline];
-}
-
-/*
- * Runstate accounting
- */
-/* stolen from arch/x86/xen/time.c */
-static void get_runstate_snapshot(struct vcpu_runstate_info *res)
-{
- u64 state_time;
- struct vcpu_runstate_info *state;
-
- BUG_ON(preemptible());
-
- state = &__get_cpu_var(xen_runstate);
-
- /*
- * The runstate info is always updated by the hypervisor on
- * the current CPU, so there's no need to use anything
- * stronger than a compiler barrier when fetching it.
- */
- do {
- state_time = state->state_entry_time;
- rmb();
- *res = *state;
- rmb();
- } while (state->state_entry_time != state_time);
+ per_cpu(xen_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
+ per_cpu(xen_stolen_time, cpu) = runstate.time[RUNSTATE_runnable]
+ + runstate.time[RUNSTATE_offline];
}
#define NS_PER_TICK (1000000000LL/HZ)
@@ -94,7 +58,7 @@ consider_steal_time(unsigned long new_itm)
struct vcpu_runstate_info runstate;
struct task_struct *p = current;
- get_runstate_snapshot(&runstate);
+ xen_get_runstate_snapshot(&runstate);
/*
* Check for vcpu migration effect
@@ -202,7 +166,7 @@ static unsigned long long xen_sched_clock(void)
*/
now = ia64_native_sched_clock();
- get_runstate_snapshot(&runstate);
+ xen_get_runstate_snapshot(&runstate);
WARN_ON(runstate.state != RUNSTATE_running);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 3d88bfd..c0ca15e 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -30,9 +30,6 @@
#define TIMER_SLOP 100000
#define NS_PER_TICK (1000000000LL / HZ)
-/* runstate info updated by Xen */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
-
/* snapshots of runstate info */
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
@@ -40,77 +37,6 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
static DEFINE_PER_CPU(u64, xen_residual_stolen);
static DEFINE_PER_CPU(u64, xen_residual_blocked);
-/* return an consistent snapshot of 64-bit time/counter value */
-static u64 get64(const u64 *p)
-{
- u64 ret;
-
- if (BITS_PER_LONG < 64) {
- u32 *p32 = (u32 *)p;
- u32 h, l;
-
- /*
- * Read high then low, and then make sure high is
- * still the same; this will only loop if low wraps
- * and carries into high.
- * XXX some clean way to make this endian-proof?
- */
- do {
- h = p32[1];
- barrier();
- l = p32[0];
- barrier();
- } while (p32[1] != h);
-
- ret = (((u64)h) << 32) | l;
- } else
- ret = *p;
-
- return ret;
-}
-
-/*
- * Runstate accounting
- */
-static void get_runstate_snapshot(struct vcpu_runstate_info *res)
-{
- u64 state_time;
- struct vcpu_runstate_info *state;
-
- BUG_ON(preemptible());
-
- state = &__get_cpu_var(xen_runstate);
-
- /*
- * The runstate info is always updated by the hypervisor on
- * the current CPU, so there's no need to use anything
- * stronger than a compiler barrier when fetching it.
- */
- do {
- state_time = get64(&state->state_entry_time);
- barrier();
- *res = *state;
- barrier();
- } while (get64(&state->state_entry_time) != state_time);
-}
-
-/* return true when a vcpu could run but has no real cpu to run on */
-bool xen_vcpu_stolen(int vcpu)
-{
- return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
-}
-
-void xen_setup_runstate_info(int cpu)
-{
- struct vcpu_register_runstate_memory_area area;
-
- area.addr.v = &per_cpu(xen_runstate, cpu);
-
- if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
- cpu, &area))
- BUG();
-}
-
static void do_stolen_accounting(void)
{
struct vcpu_runstate_info state;
@@ -118,7 +44,7 @@ static void do_stolen_accounting(void)
s64 blocked, runnable, offline, stolen;
cputime_t ticks;
- get_runstate_snapshot(&state);
+ xen_get_runstate_snapshot(&state);
WARN_ON(state.state != RUNSTATE_running);
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index eabd0ee..2bf461a 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -3,7 +3,7 @@ obj-y += manage.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
endif
obj-$(CONFIG_X86) += fallback.o
-obj-y += grant-table.o features.o events.o balloon.o
+obj-y += grant-table.o features.o events.o balloon.o time.o
obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
new file mode 100644
index 0000000..c2e39d3
--- /dev/null
+++ b/drivers/xen/time.c
@@ -0,0 +1,91 @@
+/*
+ * Xen stolen ticks accounting.
+ */
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/math64.h>
+#include <linux/gfp.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/events.h>
+#include <xen/features.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/xen-ops.h>
+
+/* runstate info updated by Xen */
+static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
+
+/* return an consistent snapshot of 64-bit time/counter value */
+static u64 get64(const u64 *p)
+{
+ u64 ret;
+
+ if (BITS_PER_LONG < 64) {
+ u32 *p32 = (u32 *)p;
+ u32 h, l;
+
+ /*
+ * Read high then low, and then make sure high is
+ * still the same; this will only loop if low wraps
+ * and carries into high.
+ * XXX some clean way to make this endian-proof?
+ */
+ do {
+ h = p32[1];
+ barrier();
+ l = p32[0];
+ barrier();
+ } while (p32[1] != h);
+
+ ret = (((u64)h) << 32) | l;
+ } else
+ ret = *p;
+
+ return ret;
+}
+
+/*
+ * Runstate accounting
+ */
+void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
+{
+ u64 state_time;
+ struct vcpu_runstate_info *state;
+
+ BUG_ON(preemptible());
+
+ state = &__get_cpu_var(xen_runstate);
+
+ /*
+ * The runstate info is always updated by the hypervisor on
+ * the current CPU, so there's no need to use anything
+ * stronger than a compiler barrier when fetching it.
+ */
+ do {
+ state_time = get64(&state->state_entry_time);
+ barrier();
+ *res = *state;
+ barrier();
+ } while (get64(&state->state_entry_time) != state_time);
+}
+
+/* return true when a vcpu could run but has no real cpu to run on */
+bool xen_vcpu_stolen(int vcpu)
+{
+ return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
+}
+
+void xen_setup_runstate_info(int cpu)
+{
+ struct vcpu_register_runstate_memory_area area;
+
+ area.addr.v = &per_cpu(xen_runstate, cpu);
+
+ if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
+ cpu, &area))
+ BUG();
+}
+
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index d6fe062..4fd4e47 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -3,6 +3,7 @@
#include <linux/percpu.h>
#include <asm/xen/interface.h>
+#include <xen/interface/vcpu.h>
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
@@ -16,6 +17,10 @@ void xen_mm_unpin_all(void);
void xen_timer_resume(void);
void xen_arch_resume(void);
+bool xen_vcpu_stolen(int vcpu);
+void xen_setup_runstate_info(int cpu);
+void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
+
int xen_setup_shutdown_event(void);
extern unsigned long *xen_contiguous_bitmap;
--
1.7.2.5
Introduce CONFIG_PARAVIRT and PARAVIRT_TIME_ACCOUNTING on ARM.
The only paravirt interface supported is pv_time_ops.steal_clock, so no
runtime pvops patching needed.
This allows us to make use of steal_account_process_tick for stolen
ticks accounting.
Signed-off-by: Stefano Stabellini <[email protected]>
CC: [email protected]
CC: [email protected]
CC: [email protected]
CC: [email protected]
CC: [email protected]
CC: [email protected]
CC: [email protected]
Changes in v3:
- improve commit description and Kconfig help text;
- no need to initialize pv_time_ops;
- add PARAVIRT_TIME_ACCOUNTING.
---
arch/arm/Kconfig | 20 ++++++++++++++++++++
arch/arm/include/asm/paravirt.h | 19 +++++++++++++++++++
arch/arm/kernel/Makefile | 1 +
arch/arm/kernel/paravirt.c | 25 +++++++++++++++++++++++++
4 files changed, 65 insertions(+), 0 deletions(-)
create mode 100644 arch/arm/include/asm/paravirt.h
create mode 100644 arch/arm/kernel/paravirt.c
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 49d993c..5621a02 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1786,12 +1786,32 @@ config XEN_DOM0
def_bool y
depends on XEN
+config PARAVIRT
+ bool "Enable paravirtualization code"
+ ---help---
+ This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.
+
+config PARAVIRT_TIME_ACCOUNTING
+ bool "Paravirtual steal time accounting"
+ select PARAVIRT
+ default n
+ ---help---
+ Select this option to enable fine granularity task steal time
+ accounting. Time spent executing other tasks in parallel with
+ the current vCPU is discounted from the vCPU power. To account for
+ that, there can be a small performance impact.
+
+ If in doubt, say N here.
+
config XEN
bool "Xen guest support on ARM (EXPERIMENTAL)"
depends on ARM && AEABI && OF
depends on CPU_V7 && !CPU_V6
depends on !GENERIC_ATOMIC64
select ARM_PSCI
+ select PARAVIRT
help
Say Y if you want to run Linux in a Virtual Machine on Xen on ARM.
diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h
new file mode 100644
index 0000000..3b95bc6
--- /dev/null
+++ b/arch/arm/include/asm/paravirt.h
@@ -0,0 +1,19 @@
+#ifndef _ASM_ARM_PARAVIRT_H
+#define _ASM_ARM_PARAVIRT_H
+
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+struct pv_time_ops {
+ unsigned long long (*steal_clock)(int cpu);
+};
+extern struct pv_time_ops pv_time_ops;
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return pv_time_ops.steal_clock(cpu);
+}
+
+
+#endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338e..d911db6 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o
ifneq ($(CONFIG_ARCH_EBSA110),y)
obj-y += io.o
endif
+obj-$(CONFIG_PARAVIRT) += paravirt.o
head-y := head$(MMUEXT).o
obj-$(CONFIG_DEBUG_LL) += debug.o
diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c
new file mode 100644
index 0000000..53f371e
--- /dev/null
+++ b/arch/arm/kernel/paravirt.c
@@ -0,0 +1,25 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2013 Citrix Systems
+ *
+ * Author: Stefano Stabellini <[email protected]>
+ */
+
+#include <linux/export.h>
+#include <linux/jump_label.h>
+#include <linux/types.h>
+#include <asm/paravirt.h>
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+struct pv_time_ops pv_time_ops;
+EXPORT_SYMBOL_GPL(pv_time_ops);
--
1.7.2.5
Register the runstate_memory_area with the hypervisor.
Use pv_time_ops.steal_clock to account for stolen ticks.
Signed-off-by: Stefano Stabellini <[email protected]>
Changes in v4:
- don't use paravirt_steal_rq_enabled: we do not support retrieving
stolen ticks for vcpus other than one we are running on.
Changes in v3:
- use BUG_ON and smp_processor_id.
---
arch/arm/xen/enlighten.c | 21 +++++++++++++++++++++
1 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 13609e0..49839d8 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -14,7 +14,10 @@
#include <xen/xen-ops.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
+#include <asm/arch_timer.h>
#include <asm/system_misc.h>
+#include <asm/paravirt.h>
+#include <linux/jump_label.h>
#include <linux/interrupt.h>
#include <linux/irqreturn.h>
#include <linux/module.h>
@@ -152,6 +155,19 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
}
EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
+unsigned long long xen_stolen_accounting(int cpu)
+{
+ struct vcpu_runstate_info state;
+
+ BUG_ON(cpu != smp_processor_id());
+
+ xen_get_runstate_snapshot(&state);
+
+ WARN_ON(state.state != RUNSTATE_running);
+
+ return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
+}
+
static void __init xen_percpu_init(void *unused)
{
struct vcpu_register_vcpu_info info;
@@ -169,6 +185,8 @@ static void __init xen_percpu_init(void *unused)
BUG_ON(err);
per_cpu(xen_vcpu, cpu) = vcpup;
+ xen_setup_runstate_info(cpu);
+
enable_percpu_irq(xen_events_irq, 0);
}
@@ -300,6 +318,9 @@ static int __init xen_init_events(void)
on_each_cpu(xen_percpu_init, NULL, 0);
+ pv_time_ops.steal_clock = xen_stolen_accounting;
+ static_key_slow_inc(¶virt_steal_enabled);
+
return 0;
}
postcore_initcall(xen_init_events);
--
1.7.2.5
On Tue, May 28, 2013 at 06:54:29PM +0100, Stefano Stabellini wrote:
> Signed-off-by: Stefano Stabellini <[email protected]>
> Acked-by: Ian Campbell <[email protected]>
> CC: [email protected]
>
> Changes in v2:
> - leave do_stolen_accounting in arch/x86/xen/time.c;
> - use the new common functions in arch/ia64/xen/time.c.
So you also modify the function.
> ---
> arch/ia64/xen/time.c | 48 +++----------------------
> arch/x86/xen/time.c | 76 +----------------------------------------
> drivers/xen/Makefile | 2 +-
> drivers/xen/time.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
> include/xen/xen-ops.h | 5 +++
> 5 files changed, 104 insertions(+), 118 deletions(-)
> create mode 100644 drivers/xen/time.c
>
> diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
> index 1f8244a..79a0b8c 100644
> --- a/arch/ia64/xen/time.c
> +++ b/arch/ia64/xen/time.c
> @@ -34,53 +34,17 @@
>
> #include "../kernel/fsyscall_gtod_data.h"
>
> -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
> static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
>
> /* taken from i386/kernel/time-xen.c */
> static void xen_init_missing_ticks_accounting(int cpu)
> {
> - struct vcpu_register_runstate_memory_area area;
> - struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
> - int rc;
> + xen_setup_runstate_info(&runstate);
>
> - memset(runstate, 0, sizeof(*runstate));
> -
> - area.addr.v = runstate;
> - rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
> - &area);
> - WARN_ON(rc && rc != -ENOSYS);
> -
> - per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
> - per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
> - + runstate->time[RUNSTATE_offline];
> -}
> -
> -/*
> - * Runstate accounting
> - */
> -/* stolen from arch/x86/xen/time.c */
> -static void get_runstate_snapshot(struct vcpu_runstate_info *res)
> -{
> - u64 state_time;
> - struct vcpu_runstate_info *state;
> -
> - BUG_ON(preemptible());
> -
> - state = &__get_cpu_var(xen_runstate);
> -
> - /*
> - * The runstate info is always updated by the hypervisor on
> - * the current CPU, so there's no need to use anything
> - * stronger than a compiler barrier when fetching it.
> - */
> - do {
> - state_time = state->state_entry_time;
> - rmb();
> - *res = *state;
> - rmb();
> - } while (state->state_entry_time != state_time);
> + per_cpu(xen_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
> + per_cpu(xen_stolen_time, cpu) = runstate.time[RUNSTATE_runnable]
> + + runstate.time[RUNSTATE_offline];
> }
>
> #define NS_PER_TICK (1000000000LL/HZ)
> @@ -94,7 +58,7 @@ consider_steal_time(unsigned long new_itm)
> struct vcpu_runstate_info runstate;
> struct task_struct *p = current;
>
> - get_runstate_snapshot(&runstate);
> + xen_get_runstate_snapshot(&runstate);
>
> /*
> * Check for vcpu migration effect
> @@ -202,7 +166,7 @@ static unsigned long long xen_sched_clock(void)
> */
> now = ia64_native_sched_clock();
>
> - get_runstate_snapshot(&runstate);
> + xen_get_runstate_snapshot(&runstate);
>
> WARN_ON(runstate.state != RUNSTATE_running);
>
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index 3d88bfd..c0ca15e 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -30,9 +30,6 @@
> #define TIMER_SLOP 100000
> #define NS_PER_TICK (1000000000LL / HZ)
>
> -/* runstate info updated by Xen */
> -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> -
> /* snapshots of runstate info */
> static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
>
> @@ -40,77 +37,6 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> static DEFINE_PER_CPU(u64, xen_residual_stolen);
> static DEFINE_PER_CPU(u64, xen_residual_blocked);
>
> -/* return an consistent snapshot of 64-bit time/counter value */
> -static u64 get64(const u64 *p)
> -{
> - u64 ret;
> -
> - if (BITS_PER_LONG < 64) {
> - u32 *p32 = (u32 *)p;
> - u32 h, l;
> -
> - /*
> - * Read high then low, and then make sure high is
> - * still the same; this will only loop if low wraps
> - * and carries into high.
> - * XXX some clean way to make this endian-proof?
> - */
> - do {
> - h = p32[1];
> - barrier();
> - l = p32[0];
> - barrier();
> - } while (p32[1] != h);
> -
> - ret = (((u64)h) << 32) | l;
> - } else
> - ret = *p;
> -
> - return ret;
> -}
> -
> -/*
> - * Runstate accounting
> - */
> -static void get_runstate_snapshot(struct vcpu_runstate_info *res)
> -{
> - u64 state_time;
> - struct vcpu_runstate_info *state;
> -
> - BUG_ON(preemptible());
> -
> - state = &__get_cpu_var(xen_runstate);
> -
> - /*
> - * The runstate info is always updated by the hypervisor on
> - * the current CPU, so there's no need to use anything
> - * stronger than a compiler barrier when fetching it.
> - */
> - do {
> - state_time = get64(&state->state_entry_time);
> - barrier();
> - *res = *state;
> - barrier();
> - } while (get64(&state->state_entry_time) != state_time);
> -}
> -
> -/* return true when a vcpu could run but has no real cpu to run on */
> -bool xen_vcpu_stolen(int vcpu)
> -{
> - return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> -}
> -
> -void xen_setup_runstate_info(int cpu)
> -{
> - struct vcpu_register_runstate_memory_area area;
> -
> - area.addr.v = &per_cpu(xen_runstate, cpu);
> -
> - if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> - cpu, &area))
> - BUG();
> -}
> -
> static void do_stolen_accounting(void)
> {
> struct vcpu_runstate_info state;
> @@ -118,7 +44,7 @@ static void do_stolen_accounting(void)
> s64 blocked, runnable, offline, stolen;
> cputime_t ticks;
>
> - get_runstate_snapshot(&state);
> + xen_get_runstate_snapshot(&state);
>
> WARN_ON(state.state != RUNSTATE_running);
>
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index eabd0ee..2bf461a 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -3,7 +3,7 @@ obj-y += manage.o
> obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
> endif
> obj-$(CONFIG_X86) += fallback.o
> -obj-y += grant-table.o features.o events.o balloon.o
> +obj-y += grant-table.o features.o events.o balloon.o time.o
> obj-y += xenbus/
>
> nostackp := $(call cc-option, -fno-stack-protector)
> diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> new file mode 100644
> index 0000000..c2e39d3
> --- /dev/null
> +++ b/drivers/xen/time.c
> @@ -0,0 +1,91 @@
> +/*
> + * Xen stolen ticks accounting.
> + */
> +#include <linux/kernel.h>
> +#include <linux/kernel_stat.h>
> +#include <linux/math64.h>
> +#include <linux/gfp.h>
> +
> +#include <asm/xen/hypervisor.h>
> +#include <asm/xen/hypercall.h>
> +
> +#include <xen/events.h>
> +#include <xen/features.h>
> +#include <xen/interface/xen.h>
> +#include <xen/interface/vcpu.h>
> +#include <xen/xen-ops.h>
> +
> +/* runstate info updated by Xen */
> +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> +
> +/* return an consistent snapshot of 64-bit time/counter value */
> +static u64 get64(const u64 *p)
> +{
> + u64 ret;
> +
> + if (BITS_PER_LONG < 64) {
> + u32 *p32 = (u32 *)p;
> + u32 h, l;
> +
> + /*
> + * Read high then low, and then make sure high is
> + * still the same; this will only loop if low wraps
> + * and carries into high.
> + * XXX some clean way to make this endian-proof?
> + */
> + do {
> + h = p32[1];
> + barrier();
> + l = p32[0];
> + barrier();
> + } while (p32[1] != h);
> +
> + ret = (((u64)h) << 32) | l;
> + } else
> + ret = *p;
> +
> + return ret;
> +}
> +
> +/*
> + * Runstate accounting
> + */
> +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
> +{
> + u64 state_time;
> + struct vcpu_runstate_info *state;
> +
> + BUG_ON(preemptible());
> +
> + state = &__get_cpu_var(xen_runstate);
> +
> + /*
> + * The runstate info is always updated by the hypervisor on
> + * the current CPU, so there's no need to use anything
> + * stronger than a compiler barrier when fetching it.
> + */
> + do {
> + state_time = get64(&state->state_entry_time);
> + barrier();
> + *res = *state;
> + barrier();
> + } while (get64(&state->state_entry_time) != state_time);
> +}
> +
> +/* return true when a vcpu could run but has no real cpu to run on */
> +bool xen_vcpu_stolen(int vcpu)
> +{
> + return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> +}
> +
> +void xen_setup_runstate_info(int cpu)
> +{
> + struct vcpu_register_runstate_memory_area area;
> +
> + area.addr.v = &per_cpu(xen_runstate, cpu);
> +
> + if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> + cpu, &area))
> + BUG();
The original code did:
- rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
- &area);
- WARN_ON(rc && rc != -ENOSYS);
-
Any reason not to do this?
> +}
> +
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index d6fe062..4fd4e47 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -3,6 +3,7 @@
>
> #include <linux/percpu.h>
> #include <asm/xen/interface.h>
> +#include <xen/interface/vcpu.h>
>
> DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
>
> @@ -16,6 +17,10 @@ void xen_mm_unpin_all(void);
> void xen_timer_resume(void);
> void xen_arch_resume(void);
>
> +bool xen_vcpu_stolen(int vcpu);
> +void xen_setup_runstate_info(int cpu);
> +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
> +
> int xen_setup_shutdown_event(void);
>
> extern unsigned long *xen_contiguous_bitmap;
> --
> 1.7.2.5
>
On Tue, 28 May 2013, Konrad Rzeszutek Wilk wrote:
> On Tue, May 28, 2013 at 06:54:29PM +0100, Stefano Stabellini wrote:
> > Signed-off-by: Stefano Stabellini <[email protected]>
> > Acked-by: Ian Campbell <[email protected]>
> > CC: [email protected]
> >
> > Changes in v2:
> > - leave do_stolen_accounting in arch/x86/xen/time.c;
> > - use the new common functions in arch/ia64/xen/time.c.
>
> So you also modify the function.
do_stolen_accounting? Just enought to call the new common function
xen_get_runstate_snapshot instead of the old x86 specific
get_runstate_snapshot.
> > ---
> > arch/ia64/xen/time.c | 48 +++----------------------
> > arch/x86/xen/time.c | 76 +----------------------------------------
> > drivers/xen/Makefile | 2 +-
> > drivers/xen/time.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
> > include/xen/xen-ops.h | 5 +++
> > 5 files changed, 104 insertions(+), 118 deletions(-)
> > create mode 100644 drivers/xen/time.c
> >
> > diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
> > index 1f8244a..79a0b8c 100644
> > --- a/arch/ia64/xen/time.c
> > +++ b/arch/ia64/xen/time.c
> > @@ -34,53 +34,17 @@
> >
> > #include "../kernel/fsyscall_gtod_data.h"
> >
> > -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> > static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
> > static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
> >
> > /* taken from i386/kernel/time-xen.c */
> > static void xen_init_missing_ticks_accounting(int cpu)
> > {
> > - struct vcpu_register_runstate_memory_area area;
> > - struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
> > - int rc;
> > + xen_setup_runstate_info(&runstate);
> >
> > - memset(runstate, 0, sizeof(*runstate));
> > -
> > - area.addr.v = runstate;
> > - rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
> > - &area);
> > - WARN_ON(rc && rc != -ENOSYS);
> > -
> > - per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
> > - per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
> > - + runstate->time[RUNSTATE_offline];
> > -}
> > -
> > -/*
> > - * Runstate accounting
> > - */
> > -/* stolen from arch/x86/xen/time.c */
> > -static void get_runstate_snapshot(struct vcpu_runstate_info *res)
> > -{
> > - u64 state_time;
> > - struct vcpu_runstate_info *state;
> > -
> > - BUG_ON(preemptible());
> > -
> > - state = &__get_cpu_var(xen_runstate);
> > -
> > - /*
> > - * The runstate info is always updated by the hypervisor on
> > - * the current CPU, so there's no need to use anything
> > - * stronger than a compiler barrier when fetching it.
> > - */
> > - do {
> > - state_time = state->state_entry_time;
> > - rmb();
> > - *res = *state;
> > - rmb();
> > - } while (state->state_entry_time != state_time);
> > + per_cpu(xen_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
> > + per_cpu(xen_stolen_time, cpu) = runstate.time[RUNSTATE_runnable]
> > + + runstate.time[RUNSTATE_offline];
> > }
> >
> > #define NS_PER_TICK (1000000000LL/HZ)
> > @@ -94,7 +58,7 @@ consider_steal_time(unsigned long new_itm)
> > struct vcpu_runstate_info runstate;
> > struct task_struct *p = current;
> >
> > - get_runstate_snapshot(&runstate);
> > + xen_get_runstate_snapshot(&runstate);
> >
> > /*
> > * Check for vcpu migration effect
> > @@ -202,7 +166,7 @@ static unsigned long long xen_sched_clock(void)
> > */
> > now = ia64_native_sched_clock();
> >
> > - get_runstate_snapshot(&runstate);
> > + xen_get_runstate_snapshot(&runstate);
> >
> > WARN_ON(runstate.state != RUNSTATE_running);
> >
> > diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> > index 3d88bfd..c0ca15e 100644
> > --- a/arch/x86/xen/time.c
> > +++ b/arch/x86/xen/time.c
> > @@ -30,9 +30,6 @@
> > #define TIMER_SLOP 100000
> > #define NS_PER_TICK (1000000000LL / HZ)
> >
> > -/* runstate info updated by Xen */
> > -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> > -
> > /* snapshots of runstate info */
> > static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> >
> > @@ -40,77 +37,6 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> > static DEFINE_PER_CPU(u64, xen_residual_stolen);
> > static DEFINE_PER_CPU(u64, xen_residual_blocked);
> >
> > -/* return an consistent snapshot of 64-bit time/counter value */
> > -static u64 get64(const u64 *p)
> > -{
> > - u64 ret;
> > -
> > - if (BITS_PER_LONG < 64) {
> > - u32 *p32 = (u32 *)p;
> > - u32 h, l;
> > -
> > - /*
> > - * Read high then low, and then make sure high is
> > - * still the same; this will only loop if low wraps
> > - * and carries into high.
> > - * XXX some clean way to make this endian-proof?
> > - */
> > - do {
> > - h = p32[1];
> > - barrier();
> > - l = p32[0];
> > - barrier();
> > - } while (p32[1] != h);
> > -
> > - ret = (((u64)h) << 32) | l;
> > - } else
> > - ret = *p;
> > -
> > - return ret;
> > -}
> > -
> > -/*
> > - * Runstate accounting
> > - */
> > -static void get_runstate_snapshot(struct vcpu_runstate_info *res)
> > -{
> > - u64 state_time;
> > - struct vcpu_runstate_info *state;
> > -
> > - BUG_ON(preemptible());
> > -
> > - state = &__get_cpu_var(xen_runstate);
> > -
> > - /*
> > - * The runstate info is always updated by the hypervisor on
> > - * the current CPU, so there's no need to use anything
> > - * stronger than a compiler barrier when fetching it.
> > - */
> > - do {
> > - state_time = get64(&state->state_entry_time);
> > - barrier();
> > - *res = *state;
> > - barrier();
> > - } while (get64(&state->state_entry_time) != state_time);
> > -}
> > -
> > -/* return true when a vcpu could run but has no real cpu to run on */
> > -bool xen_vcpu_stolen(int vcpu)
> > -{
> > - return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> > -}
> > -
> > -void xen_setup_runstate_info(int cpu)
> > -{
> > - struct vcpu_register_runstate_memory_area area;
> > -
> > - area.addr.v = &per_cpu(xen_runstate, cpu);
> > -
> > - if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> > - cpu, &area))
> > - BUG();
> > -}
> > -
> > static void do_stolen_accounting(void)
> > {
> > struct vcpu_runstate_info state;
> > @@ -118,7 +44,7 @@ static void do_stolen_accounting(void)
> > s64 blocked, runnable, offline, stolen;
> > cputime_t ticks;
> >
> > - get_runstate_snapshot(&state);
> > + xen_get_runstate_snapshot(&state);
> >
> > WARN_ON(state.state != RUNSTATE_running);
> >
> > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> > index eabd0ee..2bf461a 100644
> > --- a/drivers/xen/Makefile
> > +++ b/drivers/xen/Makefile
> > @@ -3,7 +3,7 @@ obj-y += manage.o
> > obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
> > endif
> > obj-$(CONFIG_X86) += fallback.o
> > -obj-y += grant-table.o features.o events.o balloon.o
> > +obj-y += grant-table.o features.o events.o balloon.o time.o
> > obj-y += xenbus/
> >
> > nostackp := $(call cc-option, -fno-stack-protector)
> > diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> > new file mode 100644
> > index 0000000..c2e39d3
> > --- /dev/null
> > +++ b/drivers/xen/time.c
> > @@ -0,0 +1,91 @@
> > +/*
> > + * Xen stolen ticks accounting.
> > + */
> > +#include <linux/kernel.h>
> > +#include <linux/kernel_stat.h>
> > +#include <linux/math64.h>
> > +#include <linux/gfp.h>
> > +
> > +#include <asm/xen/hypervisor.h>
> > +#include <asm/xen/hypercall.h>
> > +
> > +#include <xen/events.h>
> > +#include <xen/features.h>
> > +#include <xen/interface/xen.h>
> > +#include <xen/interface/vcpu.h>
> > +#include <xen/xen-ops.h>
> > +
> > +/* runstate info updated by Xen */
> > +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> > +
> > +/* return an consistent snapshot of 64-bit time/counter value */
> > +static u64 get64(const u64 *p)
> > +{
> > + u64 ret;
> > +
> > + if (BITS_PER_LONG < 64) {
> > + u32 *p32 = (u32 *)p;
> > + u32 h, l;
> > +
> > + /*
> > + * Read high then low, and then make sure high is
> > + * still the same; this will only loop if low wraps
> > + * and carries into high.
> > + * XXX some clean way to make this endian-proof?
> > + */
> > + do {
> > + h = p32[1];
> > + barrier();
> > + l = p32[0];
> > + barrier();
> > + } while (p32[1] != h);
> > +
> > + ret = (((u64)h) << 32) | l;
> > + } else
> > + ret = *p;
> > +
> > + return ret;
> > +}
> > +
> > +/*
> > + * Runstate accounting
> > + */
> > +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
> > +{
> > + u64 state_time;
> > + struct vcpu_runstate_info *state;
> > +
> > + BUG_ON(preemptible());
> > +
> > + state = &__get_cpu_var(xen_runstate);
> > +
> > + /*
> > + * The runstate info is always updated by the hypervisor on
> > + * the current CPU, so there's no need to use anything
> > + * stronger than a compiler barrier when fetching it.
> > + */
> > + do {
> > + state_time = get64(&state->state_entry_time);
> > + barrier();
> > + *res = *state;
> > + barrier();
> > + } while (get64(&state->state_entry_time) != state_time);
> > +}
> > +
> > +/* return true when a vcpu could run but has no real cpu to run on */
> > +bool xen_vcpu_stolen(int vcpu)
> > +{
> > + return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> > +}
> > +
> > +void xen_setup_runstate_info(int cpu)
> > +{
> > + struct vcpu_register_runstate_memory_area area;
> > +
> > + area.addr.v = &per_cpu(xen_runstate, cpu);
> > +
> > + if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> > + cpu, &area))
> > + BUG();
>
> The original code did:
>
> - rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
> - &area);
> - WARN_ON(rc && rc != -ENOSYS);
> -
> Any reason not to do this?
The original x86 code just BUGs out: I took that version over the ia64
version.
On Wed, May 29, 2013 at 12:03:26PM +0100, Stefano Stabellini wrote:
> On Tue, 28 May 2013, Konrad Rzeszutek Wilk wrote:
> > On Tue, May 28, 2013 at 06:54:29PM +0100, Stefano Stabellini wrote:
> > > Signed-off-by: Stefano Stabellini <[email protected]>
> > > Acked-by: Ian Campbell <[email protected]>
> > > CC: [email protected]
> > >
> > > Changes in v2:
> > > - leave do_stolen_accounting in arch/x86/xen/time.c;
> > > - use the new common functions in arch/ia64/xen/time.c.
> >
> > So you also modify the function.
>
> do_stolen_accounting? Just enought to call the new common function
> xen_get_runstate_snapshot instead of the old x86 specific
> get_runstate_snapshot.
>
>
> > > ---
> > > arch/ia64/xen/time.c | 48 +++----------------------
> > > arch/x86/xen/time.c | 76 +----------------------------------------
> > > drivers/xen/Makefile | 2 +-
> > > drivers/xen/time.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
> > > include/xen/xen-ops.h | 5 +++
> > > 5 files changed, 104 insertions(+), 118 deletions(-)
> > > create mode 100644 drivers/xen/time.c
> > >
> > > diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
> > > index 1f8244a..79a0b8c 100644
> > > --- a/arch/ia64/xen/time.c
> > > +++ b/arch/ia64/xen/time.c
> > > @@ -34,53 +34,17 @@
> > >
> > > #include "../kernel/fsyscall_gtod_data.h"
> > >
> > > -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> > > static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
> > > static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
> > >
> > > /* taken from i386/kernel/time-xen.c */
> > > static void xen_init_missing_ticks_accounting(int cpu)
> > > {
> > > - struct vcpu_register_runstate_memory_area area;
> > > - struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
> > > - int rc;
> > > + xen_setup_runstate_info(&runstate);
> > >
> > > - memset(runstate, 0, sizeof(*runstate));
> > > -
> > > - area.addr.v = runstate;
> > > - rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
> > > - &area);
> > > - WARN_ON(rc && rc != -ENOSYS);
> > > -
> > > - per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
> > > - per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
> > > - + runstate->time[RUNSTATE_offline];
> > > -}
> > > -
> > > -/*
> > > - * Runstate accounting
> > > - */
> > > -/* stolen from arch/x86/xen/time.c */
> > > -static void get_runstate_snapshot(struct vcpu_runstate_info *res)
> > > -{
> > > - u64 state_time;
> > > - struct vcpu_runstate_info *state;
> > > -
> > > - BUG_ON(preemptible());
> > > -
> > > - state = &__get_cpu_var(xen_runstate);
> > > -
> > > - /*
> > > - * The runstate info is always updated by the hypervisor on
> > > - * the current CPU, so there's no need to use anything
> > > - * stronger than a compiler barrier when fetching it.
> > > - */
> > > - do {
> > > - state_time = state->state_entry_time;
> > > - rmb();
> > > - *res = *state;
> > > - rmb();
> > > - } while (state->state_entry_time != state_time);
> > > + per_cpu(xen_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
> > > + per_cpu(xen_stolen_time, cpu) = runstate.time[RUNSTATE_runnable]
> > > + + runstate.time[RUNSTATE_offline];
> > > }
> > >
> > > #define NS_PER_TICK (1000000000LL/HZ)
> > > @@ -94,7 +58,7 @@ consider_steal_time(unsigned long new_itm)
> > > struct vcpu_runstate_info runstate;
> > > struct task_struct *p = current;
> > >
> > > - get_runstate_snapshot(&runstate);
> > > + xen_get_runstate_snapshot(&runstate);
> > >
> > > /*
> > > * Check for vcpu migration effect
> > > @@ -202,7 +166,7 @@ static unsigned long long xen_sched_clock(void)
> > > */
> > > now = ia64_native_sched_clock();
> > >
> > > - get_runstate_snapshot(&runstate);
> > > + xen_get_runstate_snapshot(&runstate);
> > >
> > > WARN_ON(runstate.state != RUNSTATE_running);
> > >
> > > diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> > > index 3d88bfd..c0ca15e 100644
> > > --- a/arch/x86/xen/time.c
> > > +++ b/arch/x86/xen/time.c
> > > @@ -30,9 +30,6 @@
> > > #define TIMER_SLOP 100000
> > > #define NS_PER_TICK (1000000000LL / HZ)
> > >
> > > -/* runstate info updated by Xen */
> > > -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> > > -
> > > /* snapshots of runstate info */
> > > static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> > >
> > > @@ -40,77 +37,6 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> > > static DEFINE_PER_CPU(u64, xen_residual_stolen);
> > > static DEFINE_PER_CPU(u64, xen_residual_blocked);
> > >
> > > -/* return an consistent snapshot of 64-bit time/counter value */
> > > -static u64 get64(const u64 *p)
> > > -{
> > > - u64 ret;
> > > -
> > > - if (BITS_PER_LONG < 64) {
> > > - u32 *p32 = (u32 *)p;
> > > - u32 h, l;
> > > -
> > > - /*
> > > - * Read high then low, and then make sure high is
> > > - * still the same; this will only loop if low wraps
> > > - * and carries into high.
> > > - * XXX some clean way to make this endian-proof?
> > > - */
> > > - do {
> > > - h = p32[1];
> > > - barrier();
> > > - l = p32[0];
> > > - barrier();
> > > - } while (p32[1] != h);
> > > -
> > > - ret = (((u64)h) << 32) | l;
> > > - } else
> > > - ret = *p;
> > > -
> > > - return ret;
> > > -}
> > > -
> > > -/*
> > > - * Runstate accounting
> > > - */
> > > -static void get_runstate_snapshot(struct vcpu_runstate_info *res)
> > > -{
> > > - u64 state_time;
> > > - struct vcpu_runstate_info *state;
> > > -
> > > - BUG_ON(preemptible());
> > > -
> > > - state = &__get_cpu_var(xen_runstate);
> > > -
> > > - /*
> > > - * The runstate info is always updated by the hypervisor on
> > > - * the current CPU, so there's no need to use anything
> > > - * stronger than a compiler barrier when fetching it.
> > > - */
> > > - do {
> > > - state_time = get64(&state->state_entry_time);
> > > - barrier();
> > > - *res = *state;
> > > - barrier();
> > > - } while (get64(&state->state_entry_time) != state_time);
> > > -}
> > > -
> > > -/* return true when a vcpu could run but has no real cpu to run on */
> > > -bool xen_vcpu_stolen(int vcpu)
> > > -{
> > > - return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> > > -}
> > > -
> > > -void xen_setup_runstate_info(int cpu)
> > > -{
> > > - struct vcpu_register_runstate_memory_area area;
> > > -
> > > - area.addr.v = &per_cpu(xen_runstate, cpu);
> > > -
> > > - if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> > > - cpu, &area))
> > > - BUG();
> > > -}
> > > -
> > > static void do_stolen_accounting(void)
> > > {
> > > struct vcpu_runstate_info state;
> > > @@ -118,7 +44,7 @@ static void do_stolen_accounting(void)
> > > s64 blocked, runnable, offline, stolen;
> > > cputime_t ticks;
> > >
> > > - get_runstate_snapshot(&state);
> > > + xen_get_runstate_snapshot(&state);
> > >
> > > WARN_ON(state.state != RUNSTATE_running);
> > >
> > > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> > > index eabd0ee..2bf461a 100644
> > > --- a/drivers/xen/Makefile
> > > +++ b/drivers/xen/Makefile
> > > @@ -3,7 +3,7 @@ obj-y += manage.o
> > > obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
> > > endif
> > > obj-$(CONFIG_X86) += fallback.o
> > > -obj-y += grant-table.o features.o events.o balloon.o
> > > +obj-y += grant-table.o features.o events.o balloon.o time.o
> > > obj-y += xenbus/
> > >
> > > nostackp := $(call cc-option, -fno-stack-protector)
> > > diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> > > new file mode 100644
> > > index 0000000..c2e39d3
> > > --- /dev/null
> > > +++ b/drivers/xen/time.c
> > > @@ -0,0 +1,91 @@
> > > +/*
> > > + * Xen stolen ticks accounting.
> > > + */
> > > +#include <linux/kernel.h>
> > > +#include <linux/kernel_stat.h>
> > > +#include <linux/math64.h>
> > > +#include <linux/gfp.h>
> > > +
> > > +#include <asm/xen/hypervisor.h>
> > > +#include <asm/xen/hypercall.h>
> > > +
> > > +#include <xen/events.h>
> > > +#include <xen/features.h>
> > > +#include <xen/interface/xen.h>
> > > +#include <xen/interface/vcpu.h>
> > > +#include <xen/xen-ops.h>
> > > +
> > > +/* runstate info updated by Xen */
> > > +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> > > +
> > > +/* return an consistent snapshot of 64-bit time/counter value */
> > > +static u64 get64(const u64 *p)
> > > +{
> > > + u64 ret;
> > > +
> > > + if (BITS_PER_LONG < 64) {
> > > + u32 *p32 = (u32 *)p;
> > > + u32 h, l;
> > > +
> > > + /*
> > > + * Read high then low, and then make sure high is
> > > + * still the same; this will only loop if low wraps
> > > + * and carries into high.
> > > + * XXX some clean way to make this endian-proof?
> > > + */
> > > + do {
> > > + h = p32[1];
> > > + barrier();
> > > + l = p32[0];
> > > + barrier();
> > > + } while (p32[1] != h);
> > > +
> > > + ret = (((u64)h) << 32) | l;
> > > + } else
> > > + ret = *p;
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +/*
> > > + * Runstate accounting
> > > + */
> > > +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
> > > +{
> > > + u64 state_time;
> > > + struct vcpu_runstate_info *state;
> > > +
> > > + BUG_ON(preemptible());
> > > +
> > > + state = &__get_cpu_var(xen_runstate);
> > > +
> > > + /*
> > > + * The runstate info is always updated by the hypervisor on
> > > + * the current CPU, so there's no need to use anything
> > > + * stronger than a compiler barrier when fetching it.
> > > + */
> > > + do {
> > > + state_time = get64(&state->state_entry_time);
> > > + barrier();
> > > + *res = *state;
> > > + barrier();
> > > + } while (get64(&state->state_entry_time) != state_time);
> > > +}
> > > +
> > > +/* return true when a vcpu could run but has no real cpu to run on */
> > > +bool xen_vcpu_stolen(int vcpu)
> > > +{
> > > + return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> > > +}
> > > +
> > > +void xen_setup_runstate_info(int cpu)
> > > +{
> > > + struct vcpu_register_runstate_memory_area area;
> > > +
> > > + area.addr.v = &per_cpu(xen_runstate, cpu);
> > > +
> > > + if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> > > + cpu, &area))
> > > + BUG();
> >
> > The original code did:
> >
> > - rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
> > - &area);
> > - WARN_ON(rc && rc != -ENOSYS);
> > -
> > Any reason not to do this?
>
> The original x86 code just BUGs out: I took that version over the ia64
> version.
Ah, I see it now. OK, then this looks good to me.
On Wed, 29 May 2013, Konrad Rzeszutek Wilk wrote:
> On Wed, May 29, 2013 at 12:03:26PM +0100, Stefano Stabellini wrote:
> > On Tue, 28 May 2013, Konrad Rzeszutek Wilk wrote:
> > > On Tue, May 28, 2013 at 06:54:29PM +0100, Stefano Stabellini wrote:
> > > > Signed-off-by: Stefano Stabellini <[email protected]>
> > > > Acked-by: Ian Campbell <[email protected]>
> > > > CC: [email protected]
> > > >
> > > > Changes in v2:
> > > > - leave do_stolen_accounting in arch/x86/xen/time.c;
> > > > - use the new common functions in arch/ia64/xen/time.c.
> > >
> > > So you also modify the function.
> >
> > do_stolen_accounting? Just enought to call the new common function
> > xen_get_runstate_snapshot instead of the old x86 specific
> > get_runstate_snapshot.
> >
> >
> > > > ---
> > > > arch/ia64/xen/time.c | 48 +++----------------------
> > > > arch/x86/xen/time.c | 76 +----------------------------------------
> > > > drivers/xen/Makefile | 2 +-
> > > > drivers/xen/time.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
> > > > include/xen/xen-ops.h | 5 +++
> > > > 5 files changed, 104 insertions(+), 118 deletions(-)
> > > > create mode 100644 drivers/xen/time.c
> > > >
> > > > diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
> > > > index 1f8244a..79a0b8c 100644
> > > > --- a/arch/ia64/xen/time.c
> > > > +++ b/arch/ia64/xen/time.c
> > > > @@ -34,53 +34,17 @@
> > > >
> > > > #include "../kernel/fsyscall_gtod_data.h"
> > > >
> > > > -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> > > > static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
> > > > static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
> > > >
> > > > /* taken from i386/kernel/time-xen.c */
> > > > static void xen_init_missing_ticks_accounting(int cpu)
> > > > {
> > > > - struct vcpu_register_runstate_memory_area area;
> > > > - struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
> > > > - int rc;
> > > > + xen_setup_runstate_info(&runstate);
> > > >
> > > > - memset(runstate, 0, sizeof(*runstate));
> > > > -
> > > > - area.addr.v = runstate;
> > > > - rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
> > > > - &area);
> > > > - WARN_ON(rc && rc != -ENOSYS);
> > > > -
> > > > - per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
> > > > - per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
> > > > - + runstate->time[RUNSTATE_offline];
> > > > -}
> > > > -
> > > > -/*
> > > > - * Runstate accounting
> > > > - */
> > > > -/* stolen from arch/x86/xen/time.c */
> > > > -static void get_runstate_snapshot(struct vcpu_runstate_info *res)
> > > > -{
> > > > - u64 state_time;
> > > > - struct vcpu_runstate_info *state;
> > > > -
> > > > - BUG_ON(preemptible());
> > > > -
> > > > - state = &__get_cpu_var(xen_runstate);
> > > > -
> > > > - /*
> > > > - * The runstate info is always updated by the hypervisor on
> > > > - * the current CPU, so there's no need to use anything
> > > > - * stronger than a compiler barrier when fetching it.
> > > > - */
> > > > - do {
> > > > - state_time = state->state_entry_time;
> > > > - rmb();
> > > > - *res = *state;
> > > > - rmb();
> > > > - } while (state->state_entry_time != state_time);
> > > > + per_cpu(xen_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
> > > > + per_cpu(xen_stolen_time, cpu) = runstate.time[RUNSTATE_runnable]
> > > > + + runstate.time[RUNSTATE_offline];
> > > > }
> > > >
> > > > #define NS_PER_TICK (1000000000LL/HZ)
> > > > @@ -94,7 +58,7 @@ consider_steal_time(unsigned long new_itm)
> > > > struct vcpu_runstate_info runstate;
> > > > struct task_struct *p = current;
> > > >
> > > > - get_runstate_snapshot(&runstate);
> > > > + xen_get_runstate_snapshot(&runstate);
> > > >
> > > > /*
> > > > * Check for vcpu migration effect
> > > > @@ -202,7 +166,7 @@ static unsigned long long xen_sched_clock(void)
> > > > */
> > > > now = ia64_native_sched_clock();
> > > >
> > > > - get_runstate_snapshot(&runstate);
> > > > + xen_get_runstate_snapshot(&runstate);
> > > >
> > > > WARN_ON(runstate.state != RUNSTATE_running);
> > > >
> > > > diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> > > > index 3d88bfd..c0ca15e 100644
> > > > --- a/arch/x86/xen/time.c
> > > > +++ b/arch/x86/xen/time.c
> > > > @@ -30,9 +30,6 @@
> > > > #define TIMER_SLOP 100000
> > > > #define NS_PER_TICK (1000000000LL / HZ)
> > > >
> > > > -/* runstate info updated by Xen */
> > > > -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> > > > -
> > > > /* snapshots of runstate info */
> > > > static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> > > >
> > > > @@ -40,77 +37,6 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> > > > static DEFINE_PER_CPU(u64, xen_residual_stolen);
> > > > static DEFINE_PER_CPU(u64, xen_residual_blocked);
> > > >
> > > > -/* return an consistent snapshot of 64-bit time/counter value */
> > > > -static u64 get64(const u64 *p)
> > > > -{
> > > > - u64 ret;
> > > > -
> > > > - if (BITS_PER_LONG < 64) {
> > > > - u32 *p32 = (u32 *)p;
> > > > - u32 h, l;
> > > > -
> > > > - /*
> > > > - * Read high then low, and then make sure high is
> > > > - * still the same; this will only loop if low wraps
> > > > - * and carries into high.
> > > > - * XXX some clean way to make this endian-proof?
> > > > - */
> > > > - do {
> > > > - h = p32[1];
> > > > - barrier();
> > > > - l = p32[0];
> > > > - barrier();
> > > > - } while (p32[1] != h);
> > > > -
> > > > - ret = (((u64)h) << 32) | l;
> > > > - } else
> > > > - ret = *p;
> > > > -
> > > > - return ret;
> > > > -}
> > > > -
> > > > -/*
> > > > - * Runstate accounting
> > > > - */
> > > > -static void get_runstate_snapshot(struct vcpu_runstate_info *res)
> > > > -{
> > > > - u64 state_time;
> > > > - struct vcpu_runstate_info *state;
> > > > -
> > > > - BUG_ON(preemptible());
> > > > -
> > > > - state = &__get_cpu_var(xen_runstate);
> > > > -
> > > > - /*
> > > > - * The runstate info is always updated by the hypervisor on
> > > > - * the current CPU, so there's no need to use anything
> > > > - * stronger than a compiler barrier when fetching it.
> > > > - */
> > > > - do {
> > > > - state_time = get64(&state->state_entry_time);
> > > > - barrier();
> > > > - *res = *state;
> > > > - barrier();
> > > > - } while (get64(&state->state_entry_time) != state_time);
> > > > -}
> > > > -
> > > > -/* return true when a vcpu could run but has no real cpu to run on */
> > > > -bool xen_vcpu_stolen(int vcpu)
> > > > -{
> > > > - return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> > > > -}
> > > > -
> > > > -void xen_setup_runstate_info(int cpu)
> > > > -{
> > > > - struct vcpu_register_runstate_memory_area area;
> > > > -
> > > > - area.addr.v = &per_cpu(xen_runstate, cpu);
> > > > -
> > > > - if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> > > > - cpu, &area))
> > > > - BUG();
> > > > -}
> > > > -
> > > > static void do_stolen_accounting(void)
> > > > {
> > > > struct vcpu_runstate_info state;
> > > > @@ -118,7 +44,7 @@ static void do_stolen_accounting(void)
> > > > s64 blocked, runnable, offline, stolen;
> > > > cputime_t ticks;
> > > >
> > > > - get_runstate_snapshot(&state);
> > > > + xen_get_runstate_snapshot(&state);
> > > >
> > > > WARN_ON(state.state != RUNSTATE_running);
> > > >
> > > > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> > > > index eabd0ee..2bf461a 100644
> > > > --- a/drivers/xen/Makefile
> > > > +++ b/drivers/xen/Makefile
> > > > @@ -3,7 +3,7 @@ obj-y += manage.o
> > > > obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
> > > > endif
> > > > obj-$(CONFIG_X86) += fallback.o
> > > > -obj-y += grant-table.o features.o events.o balloon.o
> > > > +obj-y += grant-table.o features.o events.o balloon.o time.o
> > > > obj-y += xenbus/
> > > >
> > > > nostackp := $(call cc-option, -fno-stack-protector)
> > > > diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> > > > new file mode 100644
> > > > index 0000000..c2e39d3
> > > > --- /dev/null
> > > > +++ b/drivers/xen/time.c
> > > > @@ -0,0 +1,91 @@
> > > > +/*
> > > > + * Xen stolen ticks accounting.
> > > > + */
> > > > +#include <linux/kernel.h>
> > > > +#include <linux/kernel_stat.h>
> > > > +#include <linux/math64.h>
> > > > +#include <linux/gfp.h>
> > > > +
> > > > +#include <asm/xen/hypervisor.h>
> > > > +#include <asm/xen/hypercall.h>
> > > > +
> > > > +#include <xen/events.h>
> > > > +#include <xen/features.h>
> > > > +#include <xen/interface/xen.h>
> > > > +#include <xen/interface/vcpu.h>
> > > > +#include <xen/xen-ops.h>
> > > > +
> > > > +/* runstate info updated by Xen */
> > > > +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
> > > > +
> > > > +/* return an consistent snapshot of 64-bit time/counter value */
> > > > +static u64 get64(const u64 *p)
> > > > +{
> > > > + u64 ret;
> > > > +
> > > > + if (BITS_PER_LONG < 64) {
> > > > + u32 *p32 = (u32 *)p;
> > > > + u32 h, l;
> > > > +
> > > > + /*
> > > > + * Read high then low, and then make sure high is
> > > > + * still the same; this will only loop if low wraps
> > > > + * and carries into high.
> > > > + * XXX some clean way to make this endian-proof?
> > > > + */
> > > > + do {
> > > > + h = p32[1];
> > > > + barrier();
> > > > + l = p32[0];
> > > > + barrier();
> > > > + } while (p32[1] != h);
> > > > +
> > > > + ret = (((u64)h) << 32) | l;
> > > > + } else
> > > > + ret = *p;
> > > > +
> > > > + return ret;
> > > > +}
> > > > +
> > > > +/*
> > > > + * Runstate accounting
> > > > + */
> > > > +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
> > > > +{
> > > > + u64 state_time;
> > > > + struct vcpu_runstate_info *state;
> > > > +
> > > > + BUG_ON(preemptible());
> > > > +
> > > > + state = &__get_cpu_var(xen_runstate);
> > > > +
> > > > + /*
> > > > + * The runstate info is always updated by the hypervisor on
> > > > + * the current CPU, so there's no need to use anything
> > > > + * stronger than a compiler barrier when fetching it.
> > > > + */
> > > > + do {
> > > > + state_time = get64(&state->state_entry_time);
> > > > + barrier();
> > > > + *res = *state;
> > > > + barrier();
> > > > + } while (get64(&state->state_entry_time) != state_time);
> > > > +}
> > > > +
> > > > +/* return true when a vcpu could run but has no real cpu to run on */
> > > > +bool xen_vcpu_stolen(int vcpu)
> > > > +{
> > > > + return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> > > > +}
> > > > +
> > > > +void xen_setup_runstate_info(int cpu)
> > > > +{
> > > > + struct vcpu_register_runstate_memory_area area;
> > > > +
> > > > + area.addr.v = &per_cpu(xen_runstate, cpu);
> > > > +
> > > > + if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> > > > + cpu, &area))
> > > > + BUG();
> > >
> > > The original code did:
> > >
> > > - rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
> > > - &area);
> > > - WARN_ON(rc && rc != -ENOSYS);
> > > -
> > > Any reason not to do this?
> >
> > The original x86 code just BUGs out: I took that version over the ia64
> > version.
>
> Ah, I see it now. OK, then this looks good to me.
>
Can I add your acked-by? ;-)
> > > > > +void xen_setup_runstate_info(int cpu)
> > > > > +{
> > > > > + struct vcpu_register_runstate_memory_area area;
> > > > > +
> > > > > + area.addr.v = &per_cpu(xen_runstate, cpu);
> > > > > +
> > > > > + if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> > > > > + cpu, &area))
> > > > > + BUG();
> > > >
> > > > The original code did:
> > > >
> > > > - rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
> > > > - &area);
> > > > - WARN_ON(rc && rc != -ENOSYS);
> > > > -
> > > > Any reason not to do this?
> > >
> > > The original x86 code just BUGs out: I took that version over the ia64
> > > version.
> >
> > Ah, I see it now. OK, then this looks good to me.
> >
>
> Can I add your acked-by? ;-)
How about?
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Hi Stefano,
On 05/28/2013 01:54 PM, Stefano Stabellini wrote:
> Introduce CONFIG_PARAVIRT and PARAVIRT_TIME_ACCOUNTING on ARM.
>
> The only paravirt interface supported is pv_time_ops.steal_clock, so no
> runtime pvops patching needed.
>
> This allows us to make use of steal_account_process_tick for stolen
> ticks accounting.
>
> Signed-off-by: Stefano Stabellini <[email protected]>
> CC: [email protected]
> CC: [email protected]
> CC: [email protected]
> CC: [email protected]
> CC: [email protected]
> CC: [email protected]
> CC: [email protected]
>
> Changes in v3:
> - improve commit description and Kconfig help text;
> - no need to initialize pv_time_ops;
> - add PARAVIRT_TIME_ACCOUNTING.
Looks good to me.
Acked-by: Christopher Covington <[email protected]>
--
Employee of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by the Linux Foundation.