Upcoming AMD uarch will support Bus Lock Detect (called Bus Lock Trap
in AMD docs). Add support for the same in Linux. Bus Lock Detect is
enumerated with cpuid CPUID Fn0000_0007_ECX_x0 bit [24 / BUSLOCKTRAP].
It can be enabled through MSR_IA32_DEBUGCTLMSR. When enabled, hardware
clears DR6[11] and raises a #DB exception on occurrence of Bus Lock if
CPL > 0. More detail about the feature can be found in AMD APM[1].
Patches are prepared on tip/x86/cpu (e063b531d4e8)
Patch #3 depends on SEV-ES LBRV fix:
https://lore.kernel.org/r/[email protected]
[1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June
2023, Vol 2, 13.1.3.6 Bus Lock Trap
https://bugzilla.kernel.org/attachment.cgi?id=304653
Ravi Bangoria (3):
x86/split_lock: Move Split and Bus lock code to a dedicated file
x86/bus_lock: Add support for AMD
KVM SVM: Add Bus Lock Detect support
arch/x86/include/asm/cpu.h | 4 +
arch/x86/kernel/cpu/Makefile | 1 +
arch/x86/kernel/cpu/amd.c | 2 +
arch/x86/kernel/cpu/intel.c | 407 ---------------------------
arch/x86/kernel/cpu/split-bus-lock.c | 406 ++++++++++++++++++++++++++
arch/x86/kvm/svm/nested.c | 3 +-
arch/x86/kvm/svm/svm.c | 16 +-
7 files changed, 430 insertions(+), 409 deletions(-)
create mode 100644 arch/x86/kernel/cpu/split-bus-lock.c
--
2.44.0
Upcoming AMD uarch will support Bus Lock Detect, which functionally works
identical to Intel. Move split_lock and bus_lock specific code from
intel.c to a dedicated file so that it can be compiled and supported on
non-intel platforms.
Signed-off-by: Ravi Bangoria <[email protected]>
---
arch/x86/include/asm/cpu.h | 4 +
arch/x86/kernel/cpu/Makefile | 1 +
arch/x86/kernel/cpu/intel.c | 407 ---------------------------
arch/x86/kernel/cpu/split-bus-lock.c | 406 ++++++++++++++++++++++++++
4 files changed, 411 insertions(+), 407 deletions(-)
create mode 100644 arch/x86/kernel/cpu/split-bus-lock.c
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index aa30fd8cad7f..4b5c31dc8112 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -51,6 +51,10 @@ static inline u8 get_this_hybrid_cpu_type(void)
return 0;
}
#endif
+
+void split_lock_init(void);
+void bus_lock_init(void);
+
#ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
#else
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index eb4dbcdf41f1..86a10472ad1d 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -27,6 +27,7 @@ obj-y += aperfmperf.o
obj-y += cpuid-deps.o
obj-y += umwait.o
obj-y += capflags.o powerflags.o
+obj-y += split-bus-lock.o
obj-$(CONFIG_X86_LOCAL_APIC) += topology.o
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3c3e7e5695ba..730d7a065b8a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,13 +7,9 @@
#include <linux/smp.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
-#include <linux/semaphore.h>
#include <linux/thread_info.h>
#include <linux/init.h>
#include <linux/uaccess.h>
-#include <linux/workqueue.h>
-#include <linux/delay.h>
-#include <linux/cpuhotplug.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
@@ -23,9 +19,6 @@
#include <asm/microcode.h>
#include <asm/hwcap2.h>
#include <asm/elf.h>
-#include <asm/cpu_device_id.h>
-#include <asm/cmdline.h>
-#include <asm/traps.h>
#include <asm/resctrl.h>
#include <asm/numa.h>
#include <asm/thermal.h>
@@ -41,28 +34,6 @@
#include <asm/apic.h>
#endif
-enum split_lock_detect_state {
- sld_off = 0,
- sld_warn,
- sld_fatal,
- sld_ratelimit,
-};
-
-/*
- * Default to sld_off because most systems do not support split lock detection.
- * sld_state_setup() will switch this to sld_warn on systems that support
- * split lock/bus lock detect, unless there is a command line override.
- */
-static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
-static u64 msr_test_ctrl_cache __ro_after_init;
-
-/*
- * With a name like MSR_TEST_CTL it should go without saying, but don't touch
- * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
- * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
- */
-static bool cpu_model_supports_sld __ro_after_init;
-
/*
* Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists
@@ -595,9 +566,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
}
-static void split_lock_init(void);
-static void bus_lock_init(void);
-
static void init_intel(struct cpuinfo_x86 *c)
{
early_init_intel(c);
@@ -954,381 +922,6 @@ static const struct cpu_dev intel_cpu_dev = {
cpu_dev_register(intel_cpu_dev);
-#undef pr_fmt
-#define pr_fmt(fmt) "x86/split lock detection: " fmt
-
-static const struct {
- const char *option;
- enum split_lock_detect_state state;
-} sld_options[] __initconst = {
- { "off", sld_off },
- { "warn", sld_warn },
- { "fatal", sld_fatal },
- { "ratelimit:", sld_ratelimit },
-};
-
-static struct ratelimit_state bld_ratelimit;
-
-static unsigned int sysctl_sld_mitigate = 1;
-static DEFINE_SEMAPHORE(buslock_sem, 1);
-
-#ifdef CONFIG_PROC_SYSCTL
-static struct ctl_table sld_sysctls[] = {
- {
- .procname = "split_lock_mitigate",
- .data = &sysctl_sld_mitigate,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_douintvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-};
-
-static int __init sld_mitigate_sysctl_init(void)
-{
- register_sysctl_init("kernel", sld_sysctls);
- return 0;
-}
-
-late_initcall(sld_mitigate_sysctl_init);
-#endif
-
-static inline bool match_option(const char *arg, int arglen, const char *opt)
-{
- int len = strlen(opt), ratelimit;
-
- if (strncmp(arg, opt, len))
- return false;
-
- /*
- * Min ratelimit is 1 bus lock/sec.
- * Max ratelimit is 1000 bus locks/sec.
- */
- if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
- ratelimit > 0 && ratelimit <= 1000) {
- ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
- ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
- return true;
- }
-
- return len == arglen;
-}
-
-static bool split_lock_verify_msr(bool on)
-{
- u64 ctrl, tmp;
-
- if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
- return false;
- if (on)
- ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
- else
- ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
- if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
- return false;
- rdmsrl(MSR_TEST_CTRL, tmp);
- return ctrl == tmp;
-}
-
-static void __init sld_state_setup(void)
-{
- enum split_lock_detect_state state = sld_warn;
- char arg[20];
- int i, ret;
-
- if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
- !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- return;
-
- ret = cmdline_find_option(boot_command_line, "split_lock_detect",
- arg, sizeof(arg));
- if (ret >= 0) {
- for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
- if (match_option(arg, ret, sld_options[i].option)) {
- state = sld_options[i].state;
- break;
- }
- }
- }
- sld_state = state;
-}
-
-static void __init __split_lock_setup(void)
-{
- if (!split_lock_verify_msr(false)) {
- pr_info("MSR access failed: Disabled\n");
- return;
- }
-
- rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
-
- if (!split_lock_verify_msr(true)) {
- pr_info("MSR access failed: Disabled\n");
- return;
- }
-
- /* Restore the MSR to its cached value. */
- wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
-
- setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
-}
-
-/*
- * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
- * is not implemented as one thread could undo the setting of the other
- * thread immediately after dropping the lock anyway.
- */
-static void sld_update_msr(bool on)
-{
- u64 test_ctrl_val = msr_test_ctrl_cache;
-
- if (on)
- test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
-
- wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
-}
-
-static void split_lock_init(void)
-{
- /*
- * #DB for bus lock handles ratelimit and #AC for split lock is
- * disabled.
- */
- if (sld_state == sld_ratelimit) {
- split_lock_verify_msr(false);
- return;
- }
-
- if (cpu_model_supports_sld)
- split_lock_verify_msr(sld_state != sld_off);
-}
-
-static void __split_lock_reenable_unlock(struct work_struct *work)
-{
- sld_update_msr(true);
- up(&buslock_sem);
-}
-
-static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
-
-static void __split_lock_reenable(struct work_struct *work)
-{
- sld_update_msr(true);
-}
-static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
-
-/*
- * If a CPU goes offline with pending delayed work to re-enable split lock
- * detection then the delayed work will be executed on some other CPU. That
- * handles releasing the buslock_sem, but because it executes on a
- * different CPU probably won't re-enable split lock detection. This is a
- * problem on HT systems since the sibling CPU on the same core may then be
- * left running with split lock detection disabled.
- *
- * Unconditionally re-enable detection here.
- */
-static int splitlock_cpu_offline(unsigned int cpu)
-{
- sld_update_msr(true);
-
- return 0;
-}
-
-static void split_lock_warn(unsigned long ip)
-{
- struct delayed_work *work;
- int cpu;
-
- if (!current->reported_split_lock)
- pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
- current->comm, current->pid, ip);
- current->reported_split_lock = 1;
-
- if (sysctl_sld_mitigate) {
- /*
- * misery factor #1:
- * sleep 10ms before trying to execute split lock.
- */
- if (msleep_interruptible(10) > 0)
- return;
- /*
- * Misery factor #2:
- * only allow one buslocked disabled core at a time.
- */
- if (down_interruptible(&buslock_sem) == -EINTR)
- return;
- work = &sl_reenable_unlock;
- } else {
- work = &sl_reenable;
- }
-
- cpu = get_cpu();
- schedule_delayed_work_on(cpu, work, 2);
-
- /* Disable split lock detection on this CPU to make progress */
- sld_update_msr(false);
- put_cpu();
-}
-
-bool handle_guest_split_lock(unsigned long ip)
-{
- if (sld_state == sld_warn) {
- split_lock_warn(ip);
- return true;
- }
-
- pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
- current->comm, current->pid,
- sld_state == sld_fatal ? "fatal" : "bogus", ip);
-
- current->thread.error_code = 0;
- current->thread.trap_nr = X86_TRAP_AC;
- force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
- return false;
-}
-EXPORT_SYMBOL_GPL(handle_guest_split_lock);
-
-static void bus_lock_init(void)
-{
- u64 val;
-
- if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- return;
-
- rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
-
- if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
- (sld_state == sld_warn || sld_state == sld_fatal)) ||
- sld_state == sld_off) {
- /*
- * Warn and fatal are handled by #AC for split lock if #AC for
- * split lock is supported.
- */
- val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
- } else {
- val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
- }
-
- wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
-}
-
-bool handle_user_split_lock(struct pt_regs *regs, long error_code)
-{
- if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
- return false;
- split_lock_warn(regs->ip);
- return true;
-}
-
-void handle_bus_lock(struct pt_regs *regs)
-{
- switch (sld_state) {
- case sld_off:
- break;
- case sld_ratelimit:
- /* Enforce no more than bld_ratelimit bus locks/sec. */
- while (!__ratelimit(&bld_ratelimit))
- msleep(20);
- /* Warn on the bus lock. */
- fallthrough;
- case sld_warn:
- pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
- current->comm, current->pid, regs->ip);
- break;
- case sld_fatal:
- force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
- break;
- }
-}
-
-/*
- * CPU models that are known to have the per-core split-lock detection
- * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
- */
-static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0),
- {}
-};
-
-static void __init split_lock_setup(struct cpuinfo_x86 *c)
-{
- const struct x86_cpu_id *m;
- u64 ia32_core_caps;
-
- if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return;
-
- /* Check for CPUs that have support but do not enumerate it: */
- m = x86_match_cpu(split_lock_cpu_ids);
- if (m)
- goto supported;
-
- if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
- return;
-
- /*
- * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
- * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
- * it have split lock detection.
- */
- rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
- if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
- goto supported;
-
- /* CPU is not in the model list and does not have the MSR bit: */
- return;
-
-supported:
- cpu_model_supports_sld = true;
- __split_lock_setup();
-}
-
-static void sld_state_show(void)
-{
- if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
- !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
- return;
-
- switch (sld_state) {
- case sld_off:
- pr_info("disabled\n");
- break;
- case sld_warn:
- if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
- pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
- if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
- "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
- pr_warn("No splitlock CPU offline handler\n");
- } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
- pr_info("#DB: warning on user-space bus_locks\n");
- }
- break;
- case sld_fatal:
- if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
- pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
- } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
- pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
- boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
- " from non-WB" : "");
- }
- break;
- case sld_ratelimit:
- if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
- break;
- }
-}
-
-void __init sld_setup(struct cpuinfo_x86 *c)
-{
- split_lock_setup(c);
- sld_state_setup();
- sld_state_show();
-}
-
#define X86_HYBRID_CPU_TYPE_ID_SHIFT 24
/**
diff --git a/arch/x86/kernel/cpu/split-bus-lock.c b/arch/x86/kernel/cpu/split-bus-lock.c
new file mode 100644
index 000000000000..6ba04dc8ea64
--- /dev/null
+++ b/arch/x86/kernel/cpu/split-bus-lock.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "x86/split lock detection: " fmt
+
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/cpuhotplug.h>
+#include <asm/cpu_device_id.h>
+#include <asm/cmdline.h>
+#include <asm/traps.h>
+#include <asm/cpu.h>
+
+enum split_lock_detect_state {
+ sld_off = 0,
+ sld_warn,
+ sld_fatal,
+ sld_ratelimit,
+};
+
+/*
+ * Default to sld_off because most systems do not support split lock detection.
+ * sld_state_setup() will switch this to sld_warn on systems that support
+ * split lock/bus lock detect, unless there is a command line override.
+ */
+static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
+static u64 msr_test_ctrl_cache __ro_after_init;
+
+/*
+ * With a name like MSR_TEST_CTL it should go without saying, but don't touch
+ * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
+ * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
+ */
+static bool cpu_model_supports_sld __ro_after_init;
+
+static const struct {
+ const char *option;
+ enum split_lock_detect_state state;
+} sld_options[] __initconst = {
+ { "off", sld_off },
+ { "warn", sld_warn },
+ { "fatal", sld_fatal },
+ { "ratelimit:", sld_ratelimit },
+};
+
+static struct ratelimit_state bld_ratelimit;
+
+static unsigned int sysctl_sld_mitigate = 1;
+static DEFINE_SEMAPHORE(buslock_sem, 1);
+
+#ifdef CONFIG_PROC_SYSCTL
+static struct ctl_table sld_sysctls[] = {
+ {
+ .procname = "split_lock_mitigate",
+ .data = &sysctl_sld_mitigate,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+};
+
+static int __init sld_mitigate_sysctl_init(void)
+{
+ register_sysctl_init("kernel", sld_sysctls);
+ return 0;
+}
+
+late_initcall(sld_mitigate_sysctl_init);
+#endif
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt), ratelimit;
+
+ if (strncmp(arg, opt, len))
+ return false;
+
+ /*
+ * Min ratelimit is 1 bus lock/sec.
+ * Max ratelimit is 1000 bus locks/sec.
+ */
+ if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
+ ratelimit > 0 && ratelimit <= 1000) {
+ ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
+ ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
+ return true;
+ }
+
+ return len == arglen;
+}
+
+static bool split_lock_verify_msr(bool on)
+{
+ u64 ctrl, tmp;
+
+ if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
+ return false;
+ if (on)
+ ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
+ else
+ ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
+ if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
+ return false;
+ rdmsrl(MSR_TEST_CTRL, tmp);
+ return ctrl == tmp;
+}
+
+static void __init sld_state_setup(void)
+{
+ enum split_lock_detect_state state = sld_warn;
+ char arg[20];
+ int i, ret;
+
+ if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
+ !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ return;
+
+ ret = cmdline_find_option(boot_command_line, "split_lock_detect",
+ arg, sizeof(arg));
+ if (ret >= 0) {
+ for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
+ if (match_option(arg, ret, sld_options[i].option)) {
+ state = sld_options[i].state;
+ break;
+ }
+ }
+ }
+ sld_state = state;
+}
+
+static void __init __split_lock_setup(void)
+{
+ if (!split_lock_verify_msr(false)) {
+ pr_info("MSR access failed: Disabled\n");
+ return;
+ }
+
+ rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
+
+ if (!split_lock_verify_msr(true)) {
+ pr_info("MSR access failed: Disabled\n");
+ return;
+ }
+
+ /* Restore the MSR to its cached value. */
+ wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
+
+ setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
+}
+
+/*
+ * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
+ * is not implemented as one thread could undo the setting of the other
+ * thread immediately after dropping the lock anyway.
+ */
+static void sld_update_msr(bool on)
+{
+ u64 test_ctrl_val = msr_test_ctrl_cache;
+
+ if (on)
+ test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
+
+ wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
+}
+
+void split_lock_init(void)
+{
+ /*
+ * #DB for bus lock handles ratelimit and #AC for split lock is
+ * disabled.
+ */
+ if (sld_state == sld_ratelimit) {
+ split_lock_verify_msr(false);
+ return;
+ }
+
+ if (cpu_model_supports_sld)
+ split_lock_verify_msr(sld_state != sld_off);
+}
+
+static void __split_lock_reenable_unlock(struct work_struct *work)
+{
+ sld_update_msr(true);
+ up(&buslock_sem);
+}
+
+static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
+
+static void __split_lock_reenable(struct work_struct *work)
+{
+ sld_update_msr(true);
+}
+static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
+
+/*
+ * If a CPU goes offline with pending delayed work to re-enable split lock
+ * detection then the delayed work will be executed on some other CPU. That
+ * handles releasing the buslock_sem, but because it executes on a
+ * different CPU probably won't re-enable split lock detection. This is a
+ * problem on HT systems since the sibling CPU on the same core may then be
+ * left running with split lock detection disabled.
+ *
+ * Unconditionally re-enable detection here.
+ */
+static int splitlock_cpu_offline(unsigned int cpu)
+{
+ sld_update_msr(true);
+
+ return 0;
+}
+
+static void split_lock_warn(unsigned long ip)
+{
+ struct delayed_work *work;
+ int cpu;
+
+ if (!current->reported_split_lock)
+ pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
+ current->comm, current->pid, ip);
+ current->reported_split_lock = 1;
+
+ if (sysctl_sld_mitigate) {
+ /*
+ * misery factor #1:
+ * sleep 10ms before trying to execute split lock.
+ */
+ if (msleep_interruptible(10) > 0)
+ return;
+ /*
+ * Misery factor #2:
+ * only allow one buslocked disabled core at a time.
+ */
+ if (down_interruptible(&buslock_sem) == -EINTR)
+ return;
+ work = &sl_reenable_unlock;
+ } else {
+ work = &sl_reenable;
+ }
+
+ cpu = get_cpu();
+ schedule_delayed_work_on(cpu, work, 2);
+
+ /* Disable split lock detection on this CPU to make progress */
+ sld_update_msr(false);
+ put_cpu();
+}
+
+bool handle_guest_split_lock(unsigned long ip)
+{
+ if (sld_state == sld_warn) {
+ split_lock_warn(ip);
+ return true;
+ }
+
+ pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
+ current->comm, current->pid,
+ sld_state == sld_fatal ? "fatal" : "bogus", ip);
+
+ current->thread.error_code = 0;
+ current->thread.trap_nr = X86_TRAP_AC;
+ force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
+ return false;
+}
+EXPORT_SYMBOL_GPL(handle_guest_split_lock);
+
+void bus_lock_init(void)
+{
+ u64 val;
+
+ if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ return;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
+
+ if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
+ (sld_state == sld_warn || sld_state == sld_fatal)) ||
+ sld_state == sld_off) {
+ /*
+ * Warn and fatal are handled by #AC for split lock if #AC for
+ * split lock is supported.
+ */
+ val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+ } else {
+ val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
+ }
+
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+}
+
+bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+{
+ if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
+ return false;
+ split_lock_warn(regs->ip);
+ return true;
+}
+
+void handle_bus_lock(struct pt_regs *regs)
+{
+ switch (sld_state) {
+ case sld_off:
+ break;
+ case sld_ratelimit:
+ /* Enforce no more than bld_ratelimit bus locks/sec. */
+ while (!__ratelimit(&bld_ratelimit))
+ msleep(20);
+ /* Warn on the bus lock. */
+ fallthrough;
+ case sld_warn:
+ pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
+ current->comm, current->pid, regs->ip);
+ break;
+ case sld_fatal:
+ force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
+ break;
+ }
+}
+
+/*
+ * CPU models that are known to have the per-core split-lock detection
+ * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
+ */
+static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0),
+ {}
+};
+
+static void __init split_lock_setup(struct cpuinfo_x86 *c)
+{
+ const struct x86_cpu_id *m;
+ u64 ia32_core_caps;
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return;
+
+ /* Check for CPUs that have support but do not enumerate it: */
+ m = x86_match_cpu(split_lock_cpu_ids);
+ if (m)
+ goto supported;
+
+ if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
+ return;
+
+ /*
+ * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
+ * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
+ * it have split lock detection.
+ */
+ rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
+ if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
+ goto supported;
+
+ /* CPU is not in the model list and does not have the MSR bit: */
+ return;
+
+supported:
+ cpu_model_supports_sld = true;
+ __split_lock_setup();
+}
+
+static void sld_state_show(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
+ !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+ return;
+
+ switch (sld_state) {
+ case sld_off:
+ pr_info("disabled\n");
+ break;
+ case sld_warn:
+ if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
+ pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
+ if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
+ pr_warn("No splitlock CPU offline handler\n");
+ } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
+ pr_info("#DB: warning on user-space bus_locks\n");
+ }
+ break;
+ case sld_fatal:
+ if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
+ pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
+ } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
+ pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
+ boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
+ " from non-WB" : "");
+ }
+ break;
+ case sld_ratelimit:
+ if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
+ break;
+ }
+}
+
+void __init sld_setup(struct cpuinfo_x86 *c)
+{
+ split_lock_setup(c);
+ sld_state_setup();
+ sld_state_show();
+}
--
2.44.0
Upcoming AMD uarch will support Bus Lock Detect (called Bus Lock Trap
in AMD docs). Add support for the same in Linux. Bus Lock Detect is
enumerated with cpuid CPUID Fn0000_0007_ECX_x0 bit [24 / BUSLOCKTRAP].
It can be enabled through MSR_IA32_DEBUGCTLMSR. When enabled, hardware
clears DR6[11] and raises a #DB exception on occurrence of Bus Lock if
CPL > 0. More detail about the feature can be found in AMD APM[1].
[1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June
2023, Vol 2, 13.1.3.6 Bus Lock Trap
https://bugzilla.kernel.org/attachment.cgi?id=304653
Signed-off-by: Ravi Bangoria <[email protected]>
---
arch/x86/kernel/cpu/amd.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 39f316d50ae4..013d16479a24 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1058,6 +1058,8 @@ static void init_amd(struct cpuinfo_x86 *c)
/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
+
+ bus_lock_init();
}
#ifdef CONFIG_X86_32
--
2.44.0
Upcoming AMD uarch will support Bus Lock Detect. Add support for it
in KVM. Bus Lock Detect is enabled through MSR_IA32_DEBUGCTLMSR and
MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
enabled. Add this dependency in the KVM.
Signed-off-by: Ravi Bangoria <[email protected]>
---
arch/x86/kvm/svm/nested.c | 3 ++-
arch/x86/kvm/svm/svm.c | 16 +++++++++++++++-
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 55b9a6d96bcf..6e93c2d9e7df 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -586,7 +586,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
/* These bits will be set properly on the first execution when new_vmc12 is true */
if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
- svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
+ /* DR6_RTM is not supported on AMD as of now. */
+ svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_FIXED_1 | DR6_RTM;
vmcb_mark_dirty(vmcb02, VMCB_DR);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d1a9f9951635..60f3af9bdacb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1038,7 +1038,8 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
- bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
+ u64 dbgctl_buslock_lbr = DEBUGCTLMSR_BUS_LOCK_DETECT | DEBUGCTLMSR_LBR;
+ bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & dbgctl_buslock_lbr) ||
(is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
@@ -3119,6 +3120,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (data & DEBUGCTL_RESERVED_BITS)
return 1;
+ if ((data & DEBUGCTLMSR_BUS_LOCK_DETECT) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+ return 1;
+
svm_get_lbr_vmcb(svm)->save.dbgctl = data;
svm_update_lbrv(vcpu);
break;
@@ -5157,6 +5162,15 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x8000001F (SME/SEV features) */
sev_set_cpu_caps();
+
+ /*
+ * LBR Virtualization must be enabled to support BusLockTrap inside the
+ * guest, since BusLockTrap is enabled through MSR_IA32_DEBUGCTLMSR and
+ * MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
+ * enabled.
+ */
+ if (!lbrv)
+ kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
}
static __init int svm_hardware_setup(void)
--
2.44.0
On Mon, Apr 29, 2024 at 11:36:41AM +0530, Ravi Bangoria wrote:
> Upcoming AMD uarch will support Bus Lock Detect, which functionally works
> identical to Intel. Move split_lock and bus_lock specific code from
> intel.c to a dedicated file so that it can be compiled and supported on
> non-intel platforms.
>
> Signed-off-by: Ravi Bangoria <[email protected]>
> ---
> arch/x86/include/asm/cpu.h | 4 +
> arch/x86/kernel/cpu/Makefile | 1 +
> arch/x86/kernel/cpu/intel.c | 407 ---------------------------
> arch/x86/kernel/cpu/split-bus-lock.c | 406 ++++++++++++++++++++++++++
> 4 files changed, 411 insertions(+), 407 deletions(-)
> create mode 100644 arch/x86/kernel/cpu/split-bus-lock.c
>
> diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
> index aa30fd8cad7f..4b5c31dc8112 100644
> --- a/arch/x86/include/asm/cpu.h
> +++ b/arch/x86/include/asm/cpu.h
> @@ -51,6 +51,10 @@ static inline u8 get_this_hybrid_cpu_type(void)
> return 0;
> }
> #endif
> +
> +void split_lock_init(void);
> +void bus_lock_init(void);
> +
> #ifdef CONFIG_IA32_FEAT_CTL
> void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
> #else
> diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
> index eb4dbcdf41f1..86a10472ad1d 100644
> --- a/arch/x86/kernel/cpu/Makefile
> +++ b/arch/x86/kernel/cpu/Makefile
> @@ -27,6 +27,7 @@ obj-y += aperfmperf.o
> obj-y += cpuid-deps.o
> obj-y += umwait.o
> obj-y += capflags.o powerflags.o
> +obj-y += split-bus-lock.o
Too fine-grained a name and those "-" should be "_".
Perhaps bus_lock.c only...
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 4/29/24 01:06, Ravi Bangoria wrote:
> Upcoming AMD uarch will support Bus Lock Detect (called Bus Lock Trap
> in AMD docs). Add support for the same in Linux. Bus Lock Detect is
> enumerated with cpuid CPUID Fn0000_0007_ECX_x0 bit [24 / BUSLOCKTRAP].
> It can be enabled through MSR_IA32_DEBUGCTLMSR. When enabled, hardware
> clears DR6[11] and raises a #DB exception on occurrence of Bus Lock if
> CPL > 0. More detail about the feature can be found in AMD APM[1].
>
> [1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June
> 2023, Vol 2, 13.1.3.6 Bus Lock Trap
> https://bugzilla.kernel.org/attachment.cgi?id=304653
>
> Signed-off-by: Ravi Bangoria <[email protected]>
> ---
> arch/x86/kernel/cpu/amd.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index 39f316d50ae4..013d16479a24 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -1058,6 +1058,8 @@ static void init_amd(struct cpuinfo_x86 *c)
>
> /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
> clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
> +
> + bus_lock_init();
Can this call and the one in the intel.c be moved to common.c?
Thanks,
Tom
> }
>
> #ifdef CONFIG_X86_32
On Sun, Apr 28, 2024 at 11:08 PM Ravi Bangoria <[email protected]> wrote:
>
> Upcoming AMD uarch will support Bus Lock Detect (called Bus Lock Trap
> in AMD docs). Add support for the same in Linux. Bus Lock Detect is
> enumerated with cpuid CPUID Fn0000_0007_ECX_x0 bit [24 / BUSLOCKTRAP].
> It can be enabled through MSR_IA32_DEBUGCTLMSR. When enabled, hardware
> clears DR6[11] and raises a #DB exception on occurrence of Bus Lock if
> CPL > 0. More detail about the feature can be found in AMD APM[1].
>
> [1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June
> 2023, Vol 2, 13.1.3.6 Bus Lock Trap
> https://bugzilla.kernel.org/attachment.cgi?id=304653
Is there any chance of getting something similar to Intel's "VMM
bus-lock detection," which causes a trap-style VM-exit on a bus lock
event?
On 06-May-24 9:54 PM, Jim Mattson wrote:
> On Sun, Apr 28, 2024 at 11:08 PM Ravi Bangoria <[email protected]> wrote:
>>
>> Upcoming AMD uarch will support Bus Lock Detect (called Bus Lock Trap
>> in AMD docs). Add support for the same in Linux. Bus Lock Detect is
>> enumerated with cpuid CPUID Fn0000_0007_ECX_x0 bit [24 / BUSLOCKTRAP].
>> It can be enabled through MSR_IA32_DEBUGCTLMSR. When enabled, hardware
>> clears DR6[11] and raises a #DB exception on occurrence of Bus Lock if
>> CPL > 0. More detail about the feature can be found in AMD APM[1].
>>
>> [1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June
>> 2023, Vol 2, 13.1.3.6 Bus Lock Trap
>> https://bugzilla.kernel.org/attachment.cgi?id=304653
>
> Is there any chance of getting something similar to Intel's "VMM
> bus-lock detection," which causes a trap-style VM-exit on a bus lock
> event?
You are probably asking about "Bus Lock Threshold". Please see
"15.14.5 Bus Lock Threshold" in the same doc. fwiw, Bus Lock Threshold
is of fault style.
Thanks,
Ravi
>> diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
>> index eb4dbcdf41f1..86a10472ad1d 100644
>> --- a/arch/x86/kernel/cpu/Makefile
>> +++ b/arch/x86/kernel/cpu/Makefile
>> @@ -27,6 +27,7 @@ obj-y += aperfmperf.o
>> obj-y += cpuid-deps.o
>> obj-y += umwait.o
>> obj-y += capflags.o powerflags.o
>> +obj-y += split-bus-lock.o
>
> Too fine-grained a name and those "-" should be "_".
>
> Perhaps bus_lock.c only...
Sure, will rename it to bus_lock.c
Thanks,
Ravi
>> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
>> index 39f316d50ae4..013d16479a24 100644
>> --- a/arch/x86/kernel/cpu/amd.c
>> +++ b/arch/x86/kernel/cpu/amd.c
>> @@ -1058,6 +1058,8 @@ static void init_amd(struct cpuinfo_x86 *c)
>> /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
>> clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
>> +
>> + bus_lock_init();
>
> Can this call and the one in the intel.c be moved to common.c?
Makes sense. Will do it.
Thanks,
Ravi
On Mon, Apr 29, 2024, Ravi Bangoria wrote:
> Upcoming AMD uarch will support Bus Lock Detect. Add support for it
> in KVM. Bus Lock Detect is enabled through MSR_IA32_DEBUGCTLMSR and
> MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
> enabled. Add this dependency in the KVM.
This is woefully incomplete, e.g. db_interception() needs to be updated to decipher
whether the #DB is the responsbility of the host or of the guest.
Honestly, I don't see any point in virtualizing this in KVM. As Jim alluded to,
what's far, far more interesting for KVM is "Bus Lock Threshold". Virtualizing
this for the guest would have been nice to have during the initial split-lock #AC
support, but now I'm skeptical the complexity is worth the payoff.
I suppose we could allow it if #DB isn't interecepted, at which point the enabling
required is minimal?
> Signed-off-by: Ravi Bangoria <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 3 ++-
> arch/x86/kvm/svm/svm.c | 16 +++++++++++++++-
> 2 files changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 55b9a6d96bcf..6e93c2d9e7df 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -586,7 +586,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
> /* These bits will be set properly on the first execution when new_vmc12 is true */
> if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
> vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
> - svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
> + /* DR6_RTM is not supported on AMD as of now. */
> + svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_FIXED_1 | DR6_RTM;
> vmcb_mark_dirty(vmcb02, VMCB_DR);
> }
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index d1a9f9951635..60f3af9bdacb 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1038,7 +1038,8 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
> - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
> + u64 dbgctl_buslock_lbr = DEBUGCTLMSR_BUS_LOCK_DETECT | DEBUGCTLMSR_LBR;
> + bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & dbgctl_buslock_lbr) ||
> (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
> (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
>
> @@ -3119,6 +3120,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
> if (data & DEBUGCTL_RESERVED_BITS)
> return 1;
>
> + if ((data & DEBUGCTLMSR_BUS_LOCK_DETECT) &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
> + return 1;
> +
> svm_get_lbr_vmcb(svm)->save.dbgctl = data;
> svm_update_lbrv(vcpu);
> break;
> @@ -5157,6 +5162,15 @@ static __init void svm_set_cpu_caps(void)
>
> /* CPUID 0x8000001F (SME/SEV features) */
> sev_set_cpu_caps();
> +
> + /*
> + * LBR Virtualization must be enabled to support BusLockTrap inside the
> + * guest, since BusLockTrap is enabled through MSR_IA32_DEBUGCTLMSR and
> + * MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
> + * enabled.
> + */
> + if (!lbrv)
> + kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
> }
>
> static __init int svm_hardware_setup(void)
> --
> 2.44.0
>
Hi Sean,
On 6/4/2024 6:15 AM, Sean Christopherson wrote:
> On Mon, Apr 29, 2024, Ravi Bangoria wrote:
>> Upcoming AMD uarch will support Bus Lock Detect. Add support for it
>> in KVM. Bus Lock Detect is enabled through MSR_IA32_DEBUGCTLMSR and
>> MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
>> enabled. Add this dependency in the KVM.
>
> This is woefully incomplete, e.g. db_interception() needs to be updated to decipher
> whether the #DB is the responsbility of the host or of the guest.
Can you please elaborate. Are you referring to vcpu->guest_debug thingy?
> Honestly, I don't see any point in virtualizing this in KVM. As Jim alluded to,
> what's far, far more interesting for KVM is "Bus Lock Threshold". Virtualizing
> this for the guest would have been nice to have during the initial split-lock #AC
> support, but now I'm skeptical the complexity is worth the payoff.
This has a valid usecase of penalizing offending processes. I'm not sure
how much it's really used in the production though.
> I suppose we could allow it if #DB isn't interecepted, at which point the enabling
> required is minimal?
The feature uses DEBUG_CTL MSR, #DB and DR6 register. Do you mean expose
it when all three are accelerated or just #DB?
Thanks for the feedback,
Ravi
On Wed, Jun 05, 2024, Ravi Bangoria wrote:
> Hi Sean,
>
> On 6/4/2024 6:15 AM, Sean Christopherson wrote:
> > On Mon, Apr 29, 2024, Ravi Bangoria wrote:
> >> Upcoming AMD uarch will support Bus Lock Detect. Add support for it
> >> in KVM. Bus Lock Detect is enabled through MSR_IA32_DEBUGCTLMSR and
> >> MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
> >> enabled. Add this dependency in the KVM.
> >
> > This is woefully incomplete, e.g. db_interception() needs to be updated to decipher
> > whether the #DB is the responsbility of the host or of the guest.
>
> Can you please elaborate. Are you referring to vcpu->guest_debug thingy?
Yes. More broadly, all of db_interception().
> > Honestly, I don't see any point in virtualizing this in KVM. As Jim alluded to,
> > what's far, far more interesting for KVM is "Bus Lock Threshold". Virtualizing
> > this for the guest would have been nice to have during the initial split-lock #AC
> > support, but now I'm skeptical the complexity is worth the payoff.
>
> This has a valid usecase of penalizing offending processes. I'm not sure
> how much it's really used in the production though.
Yeah, but split-lock #AC and #DB have existed on Intel for years, and no one has
put in the effort to land KVM support, despite the series getting as far as v9[*].
Some of the problems on Intel were due to the awful FMS-based feature detection,
but those weren't the only hiccups. E.g. IIRC, we never sorted out what should
happen if both the host and guest want bus-lock #DBs.
Anyways, my point is that, except for SEV-ES+ where there's no good reason NOT to
virtualize Bus Lock Detect, I'm not convinced that it's worth virtualizing bus-lock
#DBs.
[*] https://lore.kernel.org/all/[email protected]
> > I suppose we could allow it if #DB isn't interecepted, at which point the enabling
> > required is minimal?
>
> The feature uses DEBUG_CTL MSR, #DB and DR6 register. Do you mean expose
> it when all three are accelerated or just #DB?
I mean that if KVM isn't intercepting #DB, then there's no extra complexity needed
to sort out whether the #DB "belongs" to the host or the guest. See commit
90cbf6d914ad ("KVM: SEV-ES: Eliminate #DB intercept when DebugSwap enabled").
On 6/5/2024 8:38 PM, Sean Christopherson wrote:
> On Wed, Jun 05, 2024, Ravi Bangoria wrote:
>> Hi Sean,
>>
>> On 6/4/2024 6:15 AM, Sean Christopherson wrote:
>>> On Mon, Apr 29, 2024, Ravi Bangoria wrote:
>>>> Upcoming AMD uarch will support Bus Lock Detect. Add support for it
>>>> in KVM. Bus Lock Detect is enabled through MSR_IA32_DEBUGCTLMSR and
>>>> MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
>>>> enabled. Add this dependency in the KVM.
>>>
>>> This is woefully incomplete, e.g. db_interception() needs to be updated to decipher
>>> whether the #DB is the responsbility of the host or of the guest.
>>
>> Can you please elaborate. Are you referring to vcpu->guest_debug thingy?
>
> Yes. More broadly, all of db_interception().
>
>>> Honestly, I don't see any point in virtualizing this in KVM. As Jim alluded to,
>>> what's far, far more interesting for KVM is "Bus Lock Threshold". Virtualizing
>>> this for the guest would have been nice to have during the initial split-lock #AC
>>> support, but now I'm skeptical the complexity is worth the payoff.
>>
>> This has a valid usecase of penalizing offending processes. I'm not sure
>> how much it's really used in the production though.
>
> Yeah, but split-lock #AC and #DB have existed on Intel for years, and no one has
> put in the effort to land KVM support, despite the series getting as far as v9[*].
Split-Lock Detect through #AC and Bus Lock Detect through #DB are independent
features. AMD supports only Bus Lock Detect with #DB. I'm not sure about Split
Lock Detect but Intel supports Bus Lock Detect in the guest. These are the
relevant commits:
https://git.kernel.org/torvalds/c/9a3ecd5e2aa10
https://git.kernel.org/torvalds/c/e8ea85fb280ec
https://git.kernel.org/torvalds/c/76ea438b4afcd
> Some of the problems on Intel were due to the awful FMS-based feature detection,
> but those weren't the only hiccups. E.g. IIRC, we never sorted out what should
> happen if both the host and guest want bus-lock #DBs.
I've to check about vcpu->guest_debug part, but keeping that aside, host and
guest can use Bus Lock Detect in parallel because, DEBUG_CTL MSR and DR6
register are save/restored in VMCB, hardware cause a VMEXIT_EXCEPTION_1 for
guest #DB(when intercepted) and hardware raises #DB on host when it's for the
host.
Please correct me if I misunderstood your comment.
> Anyways, my point is that, except for SEV-ES+ where there's no good reason NOT to
> virtualize Bus Lock Detect, I'm not convinced that it's worth virtualizing bus-lock
> #DBs.
>
> [*] https://lore.kernel.org/all/[email protected]
>
>>> I suppose we could allow it if #DB isn't interecepted, at which point the enabling
>>> required is minimal?
>>
>> The feature uses DEBUG_CTL MSR, #DB and DR6 register. Do you mean expose
>> it when all three are accelerated or just #DB?
>
> I mean that if KVM isn't intercepting #DB, then there's no extra complexity needed
> to sort out whether the #DB "belongs" to the host or the guest. See commit
> 90cbf6d914ad ("KVM: SEV-ES: Eliminate #DB intercept when DebugSwap enabled").
On Wed, Jun 05, 2024, Ravi Bangoria wrote:
> On 6/5/2024 8:38 PM, Sean Christopherson wrote:
> > Some of the problems on Intel were due to the awful FMS-based feature detection,
> > but those weren't the only hiccups. E.g. IIRC, we never sorted out what should
> > happen if both the host and guest want bus-lock #DBs.
>
> I've to check about vcpu->guest_debug part, but keeping that aside, host and
> guest can use Bus Lock Detect in parallel because, DEBUG_CTL MSR and DR6
> register are save/restored in VMCB, hardware cause a VMEXIT_EXCEPTION_1 for
> guest #DB(when intercepted) and hardware raises #DB on host when it's for the
> host.
I'm talking about the case where the host wants to do something in response to
bus locks that occurred in the guest. E.g. if the host is taking punitive action,
say by stalling the vCPU, then the guest kernel could bypass that behavior by
enabling bus lock detect itself.
Maybe it's moot point in practice, since it sounds like Bus Lock Threshold will
be available at the same time.
Ugh, and if we wanted to let the host handle guest-induced #DBs, we'd need code
to keep Bus Lock Detect enabled in the guest since it resides in DEBUG_CTL. Bah.
So I guess if the vcpu->guest_debug part is fairly straightforward, it probably
makes to virtualize Bus Lock Detect because the only reason not to virtualize it
would actually require more work/code in KVM.
I'd still love to see Bus Lock Threshold support sooner than later though :-)