2015-11-20 15:01:35

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v12 0/4] xen/arm64: CONFIG_PARAVIRT and stolen ticks accounting

Hi all,

I dusted off this series from Jan 2014. Patch #2 and #3 still need an ack.


This patch series introduces stolen ticks accounting for Xen on ARM64.
Stolen ticks are clocksource ticks that have been "stolen" from the cpu,
typically because Linux is running in a virtual machine and the vcpu has
been descheduled. To account for these ticks we introduce
CONFIG_PARAVIRT and pv_time_ops so that we can make use of:

kernel/sched/cputime.c:steal_account_process_tick


Changes in v12:
- drop arm support
- #ifdef CONFIG_PARAVIRT the depends on it
- remove useless #include's

Changes in v11:
- add ifdef CONFIG_PARAVIRT to kernel/sched/cputime.c, because not all
architectures have an asm/paravirt.h header file to include
- drop the removal of ifdef CONFIG_PARAVIRT from kernel/sched/core.c for
the same reason


Stefano Stabellini (4):
xen: move xen_setup_runstate_info and get_runstate_snapshot to drivers/xen/time.c
missing include asm/paravirt.h in cputime.c
arm64: introduce CONFIG_PARAVIRT, PARAVIRT_TIME_ACCOUNTING and pv_time_ops
xen/arm: account for stolen ticks

arch/arm/xen/enlighten.c | 24 ++++++++++
arch/arm64/Kconfig | 20 ++++++++
arch/arm64/include/asm/paravirt.h | 20 ++++++++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/paravirt.c | 25 ++++++++++
arch/x86/xen/time.c | 76 +------------------------------
drivers/xen/Makefile | 2 +-
drivers/xen/time.c | 91 +++++++++++++++++++++++++++++++++++++
include/xen/xen-ops.h | 5 ++
kernel/sched/cputime.c | 3 ++
10 files changed, 191 insertions(+), 76 deletions(-)
create mode 100644 arch/arm64/include/asm/paravirt.h
create mode 100644 arch/arm64/kernel/paravirt.c
create mode 100644 drivers/xen/time.c


Cheers,

Stefano


2015-11-20 15:01:10

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v12 1/4] xen: move xen_setup_runstate_info and get_runstate_snapshot to drivers/xen/time.c

Signed-off-by: Stefano Stabellini <[email protected]>
Acked-by: Ian Campbell <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
CC: [email protected]

---

Changes in v10:
- rebase
---
arch/x86/xen/time.c | 76 +----------------------------------------
drivers/xen/Makefile | 2 +-
drivers/xen/time.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
include/xen/xen-ops.h | 5 +++
4 files changed, 98 insertions(+), 76 deletions(-)
create mode 100644 drivers/xen/time.c

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index f1ba6a0..041d4cd 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -32,86 +32,12 @@
#define TIMER_SLOP 100000
#define NS_PER_TICK (1000000000LL / HZ)

-/* runstate info updated by Xen */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
-
/* snapshots of runstate info */
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);

/* unused ns of stolen time */
static DEFINE_PER_CPU(u64, xen_residual_stolen);

-/* return an consistent snapshot of 64-bit time/counter value */
-static u64 get64(const u64 *p)
-{
- u64 ret;
-
- if (BITS_PER_LONG < 64) {
- u32 *p32 = (u32 *)p;
- u32 h, l;
-
- /*
- * Read high then low, and then make sure high is
- * still the same; this will only loop if low wraps
- * and carries into high.
- * XXX some clean way to make this endian-proof?
- */
- do {
- h = p32[1];
- barrier();
- l = p32[0];
- barrier();
- } while (p32[1] != h);
-
- ret = (((u64)h) << 32) | l;
- } else
- ret = *p;
-
- return ret;
-}
-
-/*
- * Runstate accounting
- */
-static void get_runstate_snapshot(struct vcpu_runstate_info *res)
-{
- u64 state_time;
- struct vcpu_runstate_info *state;
-
- BUG_ON(preemptible());
-
- state = this_cpu_ptr(&xen_runstate);
-
- /*
- * The runstate info is always updated by the hypervisor on
- * the current CPU, so there's no need to use anything
- * stronger than a compiler barrier when fetching it.
- */
- do {
- state_time = get64(&state->state_entry_time);
- barrier();
- *res = *state;
- barrier();
- } while (get64(&state->state_entry_time) != state_time);
-}
-
-/* return true when a vcpu could run but has no real cpu to run on */
-bool xen_vcpu_stolen(int vcpu)
-{
- return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
-}
-
-void xen_setup_runstate_info(int cpu)
-{
- struct vcpu_register_runstate_memory_area area;
-
- area.addr.v = &per_cpu(xen_runstate, cpu);
-
- if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
- cpu, &area))
- BUG();
-}
-
static void do_stolen_accounting(void)
{
struct vcpu_runstate_info state;
@@ -119,7 +45,7 @@ static void do_stolen_accounting(void)
s64 runnable, offline, stolen;
cputime_t ticks;

- get_runstate_snapshot(&state);
+ xen_get_runstate_snapshot(&state);

WARN_ON(state.state != RUNSTATE_running);

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index aa8a7f7..9b7a35c 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_X86) += fallback.o
-obj-y += grant-table.o features.o balloon.o manage.o preempt.o
+obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o
obj-y += events/
obj-y += xenbus/

diff --git a/drivers/xen/time.c b/drivers/xen/time.c
new file mode 100644
index 0000000..433fe24
--- /dev/null
+++ b/drivers/xen/time.c
@@ -0,0 +1,91 @@
+/*
+ * Xen stolen ticks accounting.
+ */
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/math64.h>
+#include <linux/gfp.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/events.h>
+#include <xen/features.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/xen-ops.h>
+
+/* runstate info updated by Xen */
+static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
+
+/* return an consistent snapshot of 64-bit time/counter value */
+static u64 get64(const u64 *p)
+{
+ u64 ret;
+
+ if (BITS_PER_LONG < 64) {
+ u32 *p32 = (u32 *)p;
+ u32 h, l;
+
+ /*
+ * Read high then low, and then make sure high is
+ * still the same; this will only loop if low wraps
+ * and carries into high.
+ * XXX some clean way to make this endian-proof?
+ */
+ do {
+ h = p32[1];
+ barrier();
+ l = p32[0];
+ barrier();
+ } while (p32[1] != h);
+
+ ret = (((u64)h) << 32) | l;
+ } else
+ ret = *p;
+
+ return ret;
+}
+
+/*
+ * Runstate accounting
+ */
+void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
+{
+ u64 state_time;
+ struct vcpu_runstate_info *state;
+
+ BUG_ON(preemptible());
+
+ state = this_cpu_ptr(&xen_runstate);
+
+ /*
+ * The runstate info is always updated by the hypervisor on
+ * the current CPU, so there's no need to use anything
+ * stronger than a compiler barrier when fetching it.
+ */
+ do {
+ state_time = get64(&state->state_entry_time);
+ barrier();
+ *res = *state;
+ barrier();
+ } while (get64(&state->state_entry_time) != state_time);
+}
+
+/* return true when a vcpu could run but has no real cpu to run on */
+bool xen_vcpu_stolen(int vcpu)
+{
+ return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
+}
+
+void xen_setup_runstate_info(int cpu)
+{
+ struct vcpu_register_runstate_memory_area area;
+
+ area.addr.v = &per_cpu(xen_runstate, cpu);
+
+ if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
+ cpu, &area))
+ BUG();
+}
+
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index e4e214a..86abe07 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,6 +5,7 @@
#include <linux/notifier.h>
#include <linux/efi.h>
#include <asm/xen/interface.h>
+#include <xen/interface/vcpu.h>

DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);

@@ -18,6 +19,10 @@ void xen_arch_suspend(void);
void xen_resume_notifier_register(struct notifier_block *nb);
void xen_resume_notifier_unregister(struct notifier_block *nb);

+bool xen_vcpu_stolen(int vcpu);
+void xen_setup_runstate_info(int cpu);
+void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
+
int xen_setup_shutdown_event(void);

extern unsigned long *xen_contiguous_bitmap;
--
1.7.10.4

2015-11-20 14:59:54

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v12 2/4] missing include asm/paravirt.h in cputime.c

Add include asm/paravirt.h to cputime.c, as steal_account_process_tick
calls paravirt_steal_clock, which is defined in asm/paravirt.h.

The ifdef CONFIG_PARAVIRT is necessary because not all archs have an
asm/paravirt.h to include.

The reason why currently cputime.c compiles, even though include
<asm/paravirt.h> is missing, is that on x86 asm/paravirt.h is included
by one of the other headers included in kernel/sched/cputime.c:

#include <kernel/sched/sched.h>
#include <linux/spinlock.h>
#include <linux/preempt.h>
#include <asm/preempt.h>
#include <linux/thread_info.h>
#include <asm/thread_info.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/paravirt.h>

On arm and arm64, where I am about to introduce asm/paravirt.h and
stolen time support, without #include <asm/paravirt.h> in cputime.c, I
would get an error.

Signed-off-by: Stefano Stabellini <[email protected]>
Acked-by: Peter Zijlstra (Intel) <[email protected]>
CC: [email protected]
CC: [email protected]

---

Changes in v12:
- add more info to the commit message

Changes in v11:
- add ifdef CONFIG_PARAVIRT to cputime.c, because not all architectures
have an asm/paravirt.h header file to include
- drop the removal of ifdef CONFIG_PARAVIRT from kernel/sched/core.c for
the same reason
---
kernel/sched/cputime.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8cbc3db..c7a27c4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -5,6 +5,9 @@
#include <linux/static_key.h>
#include <linux/context_tracking.h>
#include "sched.h"
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#endif


#ifdef CONFIG_IRQ_TIME_ACCOUNTING
--
1.7.10.4

2015-11-20 15:00:45

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v12 3/4] arm64: introduce CONFIG_PARAVIRT, PARAVIRT_TIME_ACCOUNTING and pv_time_ops

Introduce CONFIG_PARAVIRT and PARAVIRT_TIME_ACCOUNTING on ARM64.
Necessary duplication of paravirt.h and paravirt.c with ARM.

The only paravirt interface supported is pv_time_ops.steal_clock, so no
runtime pvops patching needed.

This allows us to make use of steal_account_process_tick for stolen
ticks accounting.

Signed-off-by: Stefano Stabellini <[email protected]>
Acked-by: Marc Zyngier <[email protected]>
CC: [email protected]
CC: [email protected]
CC: [email protected]
CC: [email protected]
CC: [email protected]
CC: [email protected]
CC: [email protected]

---

Changes in v10:
- replace "---help---" with "help"

Changes in v7:
- ifdef CONFIG_PARAVIRT the content of paravirt.h.
---
arch/arm64/Kconfig | 20 ++++++++++++++++++++
arch/arm64/include/asm/paravirt.h | 20 ++++++++++++++++++++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/paravirt.c | 25 +++++++++++++++++++++++++
4 files changed, 66 insertions(+)
create mode 100644 arch/arm64/include/asm/paravirt.h
create mode 100644 arch/arm64/kernel/paravirt.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b10647..659e286 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -533,6 +533,25 @@ config SECCOMP
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.

+config PARAVIRT
+ bool "Enable paravirtualization code"
+ help
+ This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.
+
+config PARAVIRT_TIME_ACCOUNTING
+ bool "Paravirtual steal time accounting"
+ select PARAVIRT
+ default n
+ help
+ Select this option to enable fine granularity task steal time
+ accounting. Time spent executing other tasks in parallel with
+ the current vCPU is discounted from the vCPU power. To account for
+ that, there can be a small performance impact.
+
+ If in doubt, say N here.
+
config XEN_DOM0
def_bool y
depends on XEN
@@ -541,6 +560,7 @@ config XEN
bool "Xen guest support on ARM64"
depends on ARM64 && OF
select SWIOTLB_XEN
+ select PARAVIRT
help
Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.

diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
new file mode 100644
index 0000000..fd5f428
--- /dev/null
+++ b/arch/arm64/include/asm/paravirt.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_ARM64_PARAVIRT_H
+#define _ASM_ARM64_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+struct pv_time_ops {
+ unsigned long long (*steal_clock)(int cpu);
+};
+extern struct pv_time_ops pv_time_ops;
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return pv_time_ops.steal_clock(cpu);
+}
+#endif
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 474691f..ca9fbe1 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -41,6 +41,7 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
+arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o

obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
new file mode 100644
index 0000000..53f371e
--- /dev/null
+++ b/arch/arm64/kernel/paravirt.c
@@ -0,0 +1,25 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2013 Citrix Systems
+ *
+ * Author: Stefano Stabellini <[email protected]>
+ */
+
+#include <linux/export.h>
+#include <linux/jump_label.h>
+#include <linux/types.h>
+#include <asm/paravirt.h>
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+struct pv_time_ops pv_time_ops;
+EXPORT_SYMBOL_GPL(pv_time_ops);
--
1.7.10.4

2015-11-20 14:59:56

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v12 4/4] xen/arm: account for stolen ticks

Register the runstate_memory_area with the hypervisor.
Use pv_time_ops.steal_clock to account for stolen ticks.

Signed-off-by: Stefano Stabellini <[email protected]>

---

Changes in v12:
- ifdef CONFIG_PARAVIRT the code that depends on it
- remove useless #include's

Changes in v4:
- don't use paravirt_steal_rq_enabled: we do not support retrieving
stolen ticks for vcpus other than one we are running on.

Changes in v3:
- use BUG_ON and smp_processor_id.
---
arch/arm/xen/enlighten.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index fc7ea52..408211f 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -79,6 +79,23 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
}
EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);

+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+
+static unsigned long long xen_stolen_accounting(int cpu)
+{
+ struct vcpu_runstate_info state;
+
+ BUG_ON(cpu != smp_processor_id());
+
+ xen_get_runstate_snapshot(&state);
+
+ WARN_ON(state.state != RUNSTATE_running);
+
+ return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
+}
+#endif
+
static void xen_percpu_init(void)
{
struct vcpu_register_vcpu_info info;
@@ -104,6 +121,8 @@ static void xen_percpu_init(void)
BUG_ON(err);
per_cpu(xen_vcpu, cpu) = vcpup;

+ xen_setup_runstate_info(cpu);
+
after_register_vcpu_info:
enable_percpu_irq(xen_events_irq, 0);
put_cpu();
@@ -271,6 +290,11 @@ static int __init xen_guest_init(void)

register_cpu_notifier(&xen_cpu_notifier);

+#ifdef CONFIG_PARAVIRT
+ pv_time_ops.steal_clock = xen_stolen_accounting;
+ static_key_slow_inc(&paravirt_steal_enabled);
+#endif
+
return 0;
}
early_initcall(xen_guest_init);
--
1.7.10.4