Hi Zefan,
These four patches are bugfix for ACPI and power management from mainline
v3.6, comments are welcomed!
Thanks
Hanjun Guo
Colin Cross (1):
cpuidle: fix error handling in __cpuidle_register_device
Stephen Boyd (1):
cpufreq: Fix sysfs deadlock with concurrent hotplug/frequency switch
Thomas Renninger (2):
ACPI: Untangle a return statement for better readability
ACPI: Only count valid srat memory structures
arch/ia64/kernel/acpi.c | 5 +++--
arch/x86/mm/srat.c | 16 +++++++++-------
drivers/acpi/numa.c | 12 ++++++++----
drivers/cpufreq/cpufreq.c | 35 +++++++++++++++++++++++++++--------
drivers/cpuidle/cpuidle.c | 13 +++++++++----
include/linux/acpi.h | 2 +-
6 files changed, 57 insertions(+), 26 deletions(-)
From: Thomas Renninger <[email protected]>
mainline inclusion
from mainline-3.6
upstream commit 095adbb6441172985f5ddc3b9e88cb3191bdeac4
category: bugfix
If BIOS provide OS with SRAT table which contains only one disabled
memory affinity structure, this will trigger a WARN_ON.
This patch is based on the former one "ACPI: Untangle a return
statement for better readability".
--------------------------------------------------------
Otherwise you could run into:
WARN_ON in numa_register_memblks(), because node_possible_map is zero
References: https://bugzilla.novell.com/show_bug.cgi?id=757888
On this machine (ProLiant ML570 G3) the SRAT table contains:
- No processor affinities
- One memory affinity structure (which is set disabled)
CC: Per Jessen <[email protected]>
CC: Andi Kleen <[email protected]>
Signed-off-by: Thomas Renninger <[email protected]>
Signed-off-by: Len Brown <[email protected]>
Integrated-by: Hanjun Guo <[email protected]>
---
arch/ia64/kernel/acpi.c | 5 +++--
arch/x86/mm/srat.c | 16 +++++++++-------
drivers/acpi/numa.c | 8 +++++---
include/linux/acpi.h | 2 +-
4 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 6f38b61..4405788 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -497,7 +497,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
srat_num_cpus++;
}
-void __init
+int __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
unsigned long paddr, size;
@@ -512,7 +512,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
/* Ignore disabled entries */
if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
- return;
+ return -1;
/* record this node in proximity bitmap */
pxm_bit_set(pxm);
@@ -531,6 +531,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
p->size = size;
p->nid = pxm;
num_node_memblks++;
+ return 0;
}
void __init acpi_numa_arch_fixup(void)
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index efb5b4b..db870ec 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -142,23 +142,23 @@ static inline int save_add_info(void) {return 0;}
#endif
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-void __init
+int __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
u64 start, end;
int node, pxm;
if (srat_disabled())
- return;
+ return -1;
if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
bad_srat();
- return;
+ return -1;
}
if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
- return;
+ return -1;
if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
- return;
+ return -1;
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
@@ -168,16 +168,18 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains.\n");
bad_srat();
- return;
+ return -1;
}
if (numa_add_memblk(node, start, end) < 0) {
bad_srat();
- return;
+ return -1;
}
printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
start, end);
+
+ return 0;
}
void __init acpi_numa_arch_fixup(void) {}
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 2a63993..cb31298 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -237,6 +237,8 @@ acpi_parse_processor_affinity(struct acpi_subtable_header *header,
return 0;
}
+static int __initdata parsed_numa_memblks;
+
static int __init
acpi_parse_memory_affinity(struct acpi_subtable_header * header,
const unsigned long end)
@@ -250,8 +252,8 @@ acpi_parse_memory_affinity(struct acpi_subtable_header * header,
acpi_table_print_srat_entry(header);
/* let architecture-dependent part to do it */
- acpi_numa_memory_affinity_init(memory_affinity);
-
+ if (!acpi_numa_memory_affinity_init(memory_affinity))
+ parsed_numa_memblks++;
return 0;
}
@@ -306,7 +308,7 @@ int __init acpi_numa_init(void)
if (cnt < 0)
return cnt;
- else if (cnt == 0)
+ else if (!parsed_numa_memblks)
return -ENOENT;
return 0;
}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index f421dd8..8b88441 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -96,7 +96,7 @@ void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
void acpi_numa_slit_init (struct acpi_table_slit *slit);
void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa);
void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
-void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
+int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
void acpi_numa_arch_fixup(void);
#ifdef CONFIG_ACPI_HOTPLUG_CPU
--
1.6.0.2
From: Stephen Boyd <[email protected]>
mainline inclusion
from mainline-3.6
upstream commit a9144436271583115a2230db15d0b6ae2c481d3c
category: bugfix
As cpu offline/online and DVFS will be used in the actual
production environment, this patch is worth to be backported.
--------------------------------------------------------
Running one program that continuously hotplugs and replugs a cpu
concurrently with another program that continuously writes to the
scaling_setspeed node eventually deadlocks with:
=============================================
[ INFO: possible recursive locking detected ]
3.4.0 #37 Tainted: G W
---------------------------------------------
filemonkey/122 is trying to acquire lock:
(s_active#13){++++.+}, at: [<c01a3d28>] sysfs_remove_dir+0x9c/0xb4
but task is already holding lock:
(s_active#13){++++.+}, at: [<c01a22f0>] sysfs_write_file+0xe8/0x140
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(s_active#13);
lock(s_active#13);
*** DEADLOCK ***
May be due to missing lock nesting notation
2 locks held by filemonkey/122:
#0: (&buffer->mutex){+.+.+.}, at: [<c01a2230>] sysfs_write_file+0x28/0x140
#1: (s_active#13){++++.+}, at: [<c01a22f0>] sysfs_write_file+0xe8/0x140
stack backtrace:
[<c0014fcc>] (unwind_backtrace+0x0/0x120) from [<c00ca600>] (validate_chain+0x6f8/0x1054)
[<c00ca600>] (validate_chain+0x6f8/0x1054) from [<c00cb778>] (__lock_acquire+0x81c/0x8d8)
[<c00cb778>] (__lock_acquire+0x81c/0x8d8) from [<c00cb9c0>] (lock_acquire+0x18c/0x1e8)
[<c00cb9c0>] (lock_acquire+0x18c/0x1e8) from [<c01a3ba8>] (sysfs_addrm_finish+0xd0/0x180)
[<c01a3ba8>] (sysfs_addrm_finish+0xd0/0x180) from [<c01a3d28>] (sysfs_remove_dir+0x9c/0xb4)
[<c01a3d28>] (sysfs_remove_dir+0x9c/0xb4) from [<c02d0e5c>] (kobject_del+0x10/0x38)
[<c02d0e5c>] (kobject_del+0x10/0x38) from [<c02d0f74>] (kobject_release+0xf0/0x194)
[<c02d0f74>] (kobject_release+0xf0/0x194) from [<c0565a98>] (cpufreq_cpu_put+0xc/0x24)
[<c0565a98>] (cpufreq_cpu_put+0xc/0x24) from [<c05683f0>] (store+0x6c/0x74)
[<c05683f0>] (store+0x6c/0x74) from [<c01a2314>] (sysfs_write_file+0x10c/0x140)
[<c01a2314>] (sysfs_write_file+0x10c/0x140) from [<c014af44>] (vfs_write+0xb0/0x128)
[<c014af44>] (vfs_write+0xb0/0x128) from [<c014b06c>] (sys_write+0x3c/0x68)
[<c014b06c>] (sys_write+0x3c/0x68) from [<c000e0e0>] (ret_fast_syscall+0x0/0x3c)
This is because store() in cpufreq.c indirectly calls
kobject_get() via cpufreq_cpu_get() and is the last one to call
kobject_put() via cpufreq_cpu_put(). Sysfs code should not call
kobject_get() or kobject_put() directly (see the comment around
sysfs_schedule_callback() for more information).
Fix this deadlock by introducing two new functions:
struct cpufreq_policy *cpufreq_cpu_get_sysfs(unsigned int cpu)
void cpufreq_cpu_put_sysfs(struct cpufreq_policy *data)
which do the same thing as cpufreq_cpu_{get,put}() but don't call
kobject functions.
To easily trigger this deadlock you can insert an msleep() with a
reasonably large value right after the fail label at the bottom
of the store() function in cpufreq.c and then write
scaling_setspeed in one task and offline the cpu in another. The
first task will hang and be detected by the hung task detector.
Signed-off-by: Stephen Boyd <[email protected]>
Signed-off-by: Rafael J. Wysocki <[email protected]>
Integrated-by: Hanjun Guo <[email protected]>
---
drivers/cpufreq/cpufreq.c | 35 +++++++++++++++++++++++++++--------
1 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7f2f149..fb8a527 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -138,7 +138,7 @@ void disable_cpufreq(void)
static LIST_HEAD(cpufreq_governor_list);
static DEFINE_MUTEX(cpufreq_governor_mutex);
-struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
+static struct cpufreq_policy *__cpufreq_cpu_get(unsigned int cpu, bool sysfs)
{
struct cpufreq_policy *data;
unsigned long flags;
@@ -162,7 +162,7 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
if (!data)
goto err_out_put_module;
- if (!kobject_get(&data->kobj))
+ if (!sysfs && !kobject_get(&data->kobj))
goto err_out_put_module;
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
@@ -175,16 +175,35 @@ err_out_unlock:
err_out:
return NULL;
}
+
+struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
+{
+ return __cpufreq_cpu_get(cpu, false);
+}
EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
+static struct cpufreq_policy *cpufreq_cpu_get_sysfs(unsigned int cpu)
+{
+ return __cpufreq_cpu_get(cpu, true);
+}
-void cpufreq_cpu_put(struct cpufreq_policy *data)
+static void __cpufreq_cpu_put(struct cpufreq_policy *data, bool sysfs)
{
- kobject_put(&data->kobj);
+ if (!sysfs)
+ kobject_put(&data->kobj);
module_put(cpufreq_driver->owner);
}
+
+void cpufreq_cpu_put(struct cpufreq_policy *data)
+{
+ __cpufreq_cpu_put(data, false);
+}
EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
+static void cpufreq_cpu_put_sysfs(struct cpufreq_policy *data)
+{
+ __cpufreq_cpu_put(data, true);
+}
/*********************************************************************
* EXTERNALLY AFFECTING FREQUENCY CHANGES *
@@ -617,7 +636,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
struct cpufreq_policy *policy = to_policy(kobj);
struct freq_attr *fattr = to_attr(attr);
ssize_t ret = -EINVAL;
- policy = cpufreq_cpu_get(policy->cpu);
+ policy = cpufreq_cpu_get_sysfs(policy->cpu);
if (!policy)
goto no_policy;
@@ -631,7 +650,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
unlock_policy_rwsem_read(policy->cpu);
fail:
- cpufreq_cpu_put(policy);
+ cpufreq_cpu_put_sysfs(policy);
no_policy:
return ret;
}
@@ -642,7 +661,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
struct cpufreq_policy *policy = to_policy(kobj);
struct freq_attr *fattr = to_attr(attr);
ssize_t ret = -EINVAL;
- policy = cpufreq_cpu_get(policy->cpu);
+ policy = cpufreq_cpu_get_sysfs(policy->cpu);
if (!policy)
goto no_policy;
@@ -656,7 +675,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
unlock_policy_rwsem_write(policy->cpu);
fail:
- cpufreq_cpu_put(policy);
+ cpufreq_cpu_put_sysfs(policy);
no_policy:
return ret;
}
--
1.6.0.2
From: Colin Cross <[email protected]>
mainline inclusion
from mainline-3.6
upstream commit 3af272ab75c7a0c7fa5ae5507724d961f7e7718b
category: bugfix
--------------------------------------------------------
Fix the error handling in __cpuidle_register_device to include
the missing list_del. Move it to a label, which will simplify
the error handling when coupled states are added.
Reviewed-by: Santosh Shilimkar <[email protected]>
Tested-by: Santosh Shilimkar <[email protected]>
Reviewed-by: Kevin Hilman <[email protected]>
Tested-by: Kevin Hilman <[email protected]>
Signed-off-by: Colin Cross <[email protected]>
Reviewed-by: Rafael J. Wysocki <[email protected]>
Signed-off-by: Len Brown <[email protected]>
Integrated-by: Hanjun Guo <[email protected]>
---
drivers/cpuidle/cpuidle.c | 13 +++++++++----
1 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 2f0083a..7407a98 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -387,13 +387,18 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
per_cpu(cpuidle_devices, dev->cpu) = dev;
list_add(&dev->device_list, &cpuidle_detected_devices);
- if ((ret = cpuidle_add_sysfs(cpu_dev))) {
- module_put(cpuidle_driver->owner);
- return ret;
- }
+ ret = cpuidle_add_sysfs(cpu_dev);
+ if (ret)
+ goto err_sysfs;
dev->registered = 1;
return 0;
+
+err_sysfs:
+ list_del(&dev->device_list);
+ per_cpu(cpuidle_devices, dev->cpu) = NULL;
+ module_put(cpuidle_driver->owner);
+ return ret;
}
/**
--
1.6.0.2
From: Thomas Renninger <[email protected]>
mainline inclusion
from mainline-3.6
upstream commit f3946fb6e50b750d34f445188fa6746d14596afa
category: cleanup
This patch is cleanup patch, but the next bugfix patch is
upon it.
--------------------------------------------------------
No functional change.
Signed-off-by: Thomas Renninger <[email protected]>
Signed-off-by: Len Brown <[email protected]>
Integrated-by: Hanjun Guo <[email protected]>
---
drivers/acpi/numa.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index e56f3be..2a63993 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -304,8 +304,10 @@ int __init acpi_numa_init(void)
acpi_numa_arch_fixup();
- if (cnt <= 0)
- return cnt ?: -ENOENT;
+ if (cnt < 0)
+ return cnt;
+ else if (cnt == 0)
+ return -ENOENT;
return 0;
}
--
1.6.0.2
I made a mistake in sending email, sorry for the bothering, please ignore it.
On 2013/4/10 18:22, Hanjun Guo wrote:
> Hi Zefan,
>
> These four patches are bugfix for ACPI and power management from mainline
> v3.6, comments are welcomed!
>
> Thanks
> Hanjun Guo
>
>
> Colin Cross (1):
> cpuidle: fix error handling in __cpuidle_register_device
>
> Stephen Boyd (1):
> cpufreq: Fix sysfs deadlock with concurrent hotplug/frequency switch
>
> Thomas Renninger (2):
> ACPI: Untangle a return statement for better readability
> ACPI: Only count valid srat memory structures
>
> arch/ia64/kernel/acpi.c | 5 +++--
> arch/x86/mm/srat.c | 16 +++++++++-------
> drivers/acpi/numa.c | 12 ++++++++----
> drivers/cpufreq/cpufreq.c | 35 +++++++++++++++++++++++++++--------
> drivers/cpuidle/cpuidle.c | 13 +++++++++----
> include/linux/acpi.h | 2 +-
> 6 files changed, 57 insertions(+), 26 deletions(-)
>
>
> _______________________________________________
> Kernel.openeuler mailing list
> [email protected]
> http://rnd-openeuler.huawei.com/mailman/listinfo/kernel.openeuler
>
>