Historically when running as Xen PV guest all MSR accesses have been
silently swallowing any GP faults, even when the kernel was using not
the *msr_safe() access functions.
Change that by making the behavior controllable via kernel config and
via a boot parameter.
This will help finding paths where MSRs are being accessed under Xen
which are not emulated by the hypervisor.
Changes in V3:
- new patch 2
- addressed comments
Juergen Gross (4):
xen/pv: add fault recovery control to pmu msr accesses
xen/pv: fix vendor checks for pmu emulation
xen/pv: refactor msr access functions to support safe and unsafe
accesses
xen/pv: support selecting safe/unsafe msr accesses
.../admin-guide/kernel-parameters.txt | 6 ++
arch/x86/xen/Kconfig | 9 ++
arch/x86/xen/enlighten_pv.c | 99 +++++++++++++------
arch/x86/xen/pmu.c | 71 +++++++------
4 files changed, 127 insertions(+), 58 deletions(-)
--
2.35.3
Refactor and rename xen_read_msr_safe() and xen_write_msr_safe() to
support both cases of MSR accesses, safe ones and potentially GP-fault
generating ones.
This will prepare to no longer swallow GPs silently in xen_read_msr()
and xen_write_msr().
Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>
---
V2:
- init val in xen_do_read_msr() to 0 (Jan Beulich)
---
arch/x86/xen/enlighten_pv.c | 75 +++++++++++++++++++++++++++----------
1 file changed, 56 insertions(+), 19 deletions(-)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 9b1a58dda935..d5b0844a1b7c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -917,14 +917,18 @@ static void xen_write_cr4(unsigned long cr4)
native_write_cr4(cr4);
}
-static u64 xen_read_msr_safe(unsigned int msr, int *err)
+static u64 xen_do_read_msr(unsigned int msr, int *err)
{
- u64 val;
+ u64 val = 0; /* Avoid uninitialized value for safe variant. */
if (pmu_msr_read(msr, &val, err))
return val;
- val = native_read_msr_safe(msr, err);
+ if (err)
+ val = native_read_msr_safe(msr, err);
+ else
+ val = native_read_msr(msr);
+
switch (msr) {
case MSR_IA32_APICBASE:
val &= ~X2APIC_ENABLE;
@@ -933,23 +937,39 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
return val;
}
-static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+static void set_seg(unsigned int which, unsigned int low, unsigned int high,
+ int *err)
{
- int ret;
- unsigned int which;
- u64 base;
+ u64 base = ((u64)high << 32) | low;
+
+ if (HYPERVISOR_set_segment_base(which, base) == 0)
+ return;
- ret = 0;
+ if (err)
+ *err = -EIO;
+ else
+ WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base);
+}
+/*
+ * Support write_msr_safe() and write_msr() semantics.
+ * With err == NULL write_msr() semantics are selected.
+ * Supplying an err pointer requires err to be pre-initialized with 0.
+ */
+static void xen_do_write_msr(unsigned int msr, unsigned int low,
+ unsigned int high, int *err)
+{
switch (msr) {
- case MSR_FS_BASE: which = SEGBASE_FS; goto set;
- case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
- case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
-
- set:
- base = ((u64)high << 32) | low;
- if (HYPERVISOR_set_segment_base(which, base) != 0)
- ret = -EIO;
+ case MSR_FS_BASE:
+ set_seg(SEGBASE_FS, low, high, err);
+ break;
+
+ case MSR_KERNEL_GS_BASE:
+ set_seg(SEGBASE_GS_USER, low, high, err);
+ break;
+
+ case MSR_GS_BASE:
+ set_seg(SEGBASE_GS_KERNEL, low, high, err);
break;
case MSR_STAR:
@@ -965,11 +985,28 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
break;
default:
- if (!pmu_msr_write(msr, low, high, &ret))
- ret = native_write_msr_safe(msr, low, high);
+ if (!pmu_msr_write(msr, low, high, err)) {
+ if (err)
+ *err = native_write_msr_safe(msr, low, high);
+ else
+ native_write_msr(msr, low, high);
+ }
}
+}
+
+static u64 xen_read_msr_safe(unsigned int msr, int *err)
+{
+ return xen_do_read_msr(msr, err);
+}
+
+static int xen_write_msr_safe(unsigned int msr, unsigned int low,
+ unsigned int high)
+{
+ int err = 0;
+
+ xen_do_write_msr(msr, low, high, &err);
- return ret;
+ return err;
}
static u64 xen_read_msr(unsigned int msr)
--
2.35.3
Instead of always doing the safe variants for reading and writing MSRs
in Xen PV guests, make the behavior controllable via Kconfig option
and a boot parameter.
The default will be the current behavior, which is to always use the
safe variant.
Signed-off-by: Juergen Gross <[email protected]>
---
.../admin-guide/kernel-parameters.txt | 6 +++++
arch/x86/xen/Kconfig | 9 +++++++
arch/x86/xen/enlighten_pv.c | 24 +++++++++++--------
3 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 426fa892d311..1bda9cf18fae 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6836,6 +6836,12 @@
Crash from Xen panic notifier, without executing late
panic() code such as dumping handler.
+ xen_msr_safe= [X86,XEN]
+ Format: <bool>
+ Select whether to always use non-faulting (safe) MSR
+ access functions when running as Xen PV guest. The
+ default value is controlled by CONFIG_XEN_PV_MSR_SAFE.
+
xen_nopvspin [X86,XEN]
Disables the qspinlock slowpath using Xen PV optimizations.
This parameter is obsoleted by "nopvspin" parameter, which
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 85246dd9faa1..9b1ec5d8c99c 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -92,3 +92,12 @@ config XEN_DOM0
select X86_X2APIC if XEN_PVH && X86_64
help
Support running as a Xen Dom0 guest.
+
+config XEN_PV_MSR_SAFE
+ bool "Always use safe MSR accesses in PV guests"
+ default y
+ depends on XEN_PV
+ help
+ Use safe (not faulting) MSR access functions even if the MSR access
+ should not fault anyway.
+ The default can be changed by using the "xen_msr_safe" boot parameter.
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index d5b0844a1b7c..daae454191f2 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -108,6 +108,16 @@ struct tls_descs {
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
+static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE);
+
+static int __init parse_xen_msr_safe(char *str)
+{
+ if (str)
+ return strtobool(str, &xen_msr_safe);
+ return -EINVAL;
+}
+early_param("xen_msr_safe", parse_xen_msr_safe);
+
static void __init xen_pv_init_platform(void)
{
/* PV guests can't operate virtio devices without grants. */
@@ -1011,22 +1021,16 @@ static int xen_write_msr_safe(unsigned int msr, unsigned int low,
static u64 xen_read_msr(unsigned int msr)
{
- /*
- * This will silently swallow a #GP from RDMSR. It may be worth
- * changing that.
- */
int err;
- return xen_read_msr_safe(msr, &err);
+ return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
}
static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
{
- /*
- * This will silently swallow a #GP from WRMSR. It may be worth
- * changing that.
- */
- xen_write_msr_safe(msr, low, high);
+ int err;
+
+ xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL);
}
/* This is called once we have the cpu_possible_mask */
--
2.35.3