If the warn mode with disabled mitigation mode is used, then on each
CPU where the split lock occurred detection will be disabled in order to
make progress and delayed work will be scheduled, which then will enable
detection back. Now it turns out that all CPUs use one global delayed
work structure. This leads to the fact that if a split lock occurs on
several CPUs at the same time (within 2 jiffies), only one CPU will
schedule delayed work, but the rest will not. The return value of
schedule_delayed_work_on() would have shown this, but it is not checked
in the code.
A diagram that can help to understand the bug reproduction:
https://lore.kernel.org/all/[email protected]/
In order to fix the warn mode with disabled mitigation mode, delayed work
has to be a per-CPU.
Fixes: 727209376f49 ("x86/split_lock: Add sysctl to control the misery mode")
Signed-off-by: Maksim Davydov <[email protected]>
---
arch/x86/kernel/cpu/intel.c | 65 ++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 26 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index be30d7fa2e66..527906aee688 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -971,28 +971,6 @@ static struct ratelimit_state bld_ratelimit;
static unsigned int sysctl_sld_mitigate = 1;
static DEFINE_SEMAPHORE(buslock_sem, 1);
-#ifdef CONFIG_PROC_SYSCTL
-static struct ctl_table sld_sysctls[] = {
- {
- .procname = "split_lock_mitigate",
- .data = &sysctl_sld_mitigate,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_douintvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-};
-
-static int __init sld_mitigate_sysctl_init(void)
-{
- register_sysctl_init("kernel", sld_sysctls);
- return 0;
-}
-
-late_initcall(sld_mitigate_sysctl_init);
-#endif
-
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt), ratelimit;
@@ -1115,7 +1093,43 @@ static void __split_lock_reenable(struct work_struct *work)
{
sld_update_msr(true);
}
-static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
+/*
+ * In order for each CPU to schedule itself delayed work independently of the
+ * others, delayed work struct should be per-CPU. This is not required when
+ * sysctl_sld_mitigate is enabled because of the semaphore, that limits
+ * the number of simultaneously scheduled delayed works to 1.
+ */
+static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
+
+#ifdef CONFIG_PROC_SYSCTL
+static struct ctl_table sld_sysctls[] = {
+ {
+ .procname = "split_lock_mitigate",
+ .data = &sysctl_sld_mitigate,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+};
+
+static int __init sld_mitigate_sysctl_init(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct delayed_work *work = per_cpu_ptr(&sl_reenable, cpu);
+ *work = (struct delayed_work) __DELAYED_WORK_INITIALIZER(*work,
+ __split_lock_reenable, 0);
+ }
+
+ register_sysctl_init("kernel", sld_sysctls);
+ return 0;
+}
+
+late_initcall(sld_mitigate_sysctl_init);
+#endif
/*
* If a CPU goes offline with pending delayed work to re-enable split lock
@@ -1157,12 +1171,11 @@ static void split_lock_warn(unsigned long ip)
*/
if (down_interruptible(&buslock_sem) == -EINTR)
return;
- work = &sl_reenable_unlock;
- } else {
- work = &sl_reenable;
}
cpu = get_cpu();
+ work = (sysctl_sld_mitigate ?
+ &sl_reenable_unlock : this_cpu_ptr(&sl_reenable));
schedule_delayed_work_on(cpu, work, 2);
/* Disable split lock detection on this CPU to make progress */
--
2.34.1
Hi Maksim,
kernel test robot noticed the following build warnings:
[auto build test WARNING on tip/x86/core]
[also build test WARNING on tip/master linus/master v6.9-rc6 next-20240502]
[cannot apply to tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Maksim-Davydov/x86-split_lock-fix-delayed-detection-enabling/20240502-183110
base: tip/x86/core
patch link: https://lore.kernel.org/r/20240502102731.84429-1-davydov-max%40yandex-team.ru
patch subject: [PATCH v2] x86/split_lock: fix delayed detection enabling
config: i386-buildonly-randconfig-005-20240503 (https://download.01.org/0day-ci/archive/20240503/[email protected]/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240503/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All warnings (new ones prefixed by >>):
>> arch/x86/kernel/cpu/intel.c:1113:13: warning: '__split_lock_reenable' defined but not used [-Wunused-function]
1113 | static void __split_lock_reenable(struct work_struct *work)
| ^~~~~~~~~~~~~~~~~~~~~
vim +/__split_lock_reenable +1113 arch/x86/kernel/cpu/intel.c
727209376f4998 Guilherme G. Piccoli 2022-10-24 1112
727209376f4998 Guilherme G. Piccoli 2022-10-24 @1113 static void __split_lock_reenable(struct work_struct *work)
727209376f4998 Guilherme G. Piccoli 2022-10-24 1114 {
727209376f4998 Guilherme G. Piccoli 2022-10-24 1115 sld_update_msr(true);
727209376f4998 Guilherme G. Piccoli 2022-10-24 1116 }
73f2dbc05507dd Maksim Davydov 2024-05-02 1117 /*
73f2dbc05507dd Maksim Davydov 2024-05-02 1118 * In order for each CPU to schedule itself delayed work independently of the
73f2dbc05507dd Maksim Davydov 2024-05-02 1119 * others, delayed work struct should be per-CPU. This is not required when
73f2dbc05507dd Maksim Davydov 2024-05-02 1120 * sysctl_sld_mitigate is enabled because of the semaphore, that limits
73f2dbc05507dd Maksim Davydov 2024-05-02 1121 * the number of simultaneously scheduled delayed works to 1.
73f2dbc05507dd Maksim Davydov 2024-05-02 1122 */
73f2dbc05507dd Maksim Davydov 2024-05-02 1123 static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
73f2dbc05507dd Maksim Davydov 2024-05-02 1124
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki