From: Xiang Chen <[email protected]>
If not up all the cpus with command line "maxcpus=x", system will be
blocked.
We find that some amba devices such as ETM devices, are associated with
special cpus, and if the cpu is not up, the register of associated device
is not allowed to access. BIOS reports all the ETM device nodes and a
amba device is created for every ETM device, so even if one cpu is not up,
the amba device will still be created for the associated device, and also
the register of device (pid and cid) will be accessed when adding amba
device which will cause the issue.
To fix it, skip creating amba device if it is associated with a cpu which
is not online.
Signed-off-by: Xiang Chen <[email protected]>
---
drivers/acpi/acpi_amba.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c
index ab8a4e0191b1..2369198f734b 100644
--- a/drivers/acpi/acpi_amba.c
+++ b/drivers/acpi/acpi_amba.c
@@ -16,6 +16,7 @@
#include <linux/ioport.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <acpi/processor.h>
#include "internal.h"
@@ -45,6 +46,35 @@ static void amba_register_dummy_clk(void)
clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
}
+static int acpi_handle_to_cpuid(acpi_handle handle)
+{
+ int cpu = -1;
+ struct acpi_processor *pr;
+
+ for_each_possible_cpu(cpu) {
+ pr = per_cpu(processors, cpu);
+ if (pr && pr->handle == handle)
+ break;
+ }
+
+ return cpu;
+}
+
+static int acpi_dev_get_cpu(struct acpi_device *adev)
+{
+ acpi_handle cpu_handle;
+ acpi_status status;
+ int cpu;
+
+ status = acpi_get_parent(adev->handle, &cpu_handle);
+ if (ACPI_FAILURE(status))
+ return -1;
+ cpu = acpi_handle_to_cpuid(cpu_handle);
+ if (cpu >= nr_cpu_ids)
+ return -1;
+ return cpu;
+}
+
static int amba_handler_attach(struct acpi_device *adev,
const struct acpi_device_id *id)
{
@@ -54,11 +84,17 @@ static int amba_handler_attach(struct acpi_device *adev,
bool address_found = false;
int irq_no = 0;
int ret;
+ int cpu;
/* If the ACPI node already has a physical device attached, skip it. */
if (adev->physical_node_count)
return 0;
+ /* If the cpu associated with the device is not online, skip it. */
+ cpu = acpi_dev_get_cpu(adev);
+ if (cpu >= 0 && !cpu_online(cpu))
+ return 0;
+
dev = amba_device_alloc(dev_name(&adev->dev), 0, 0);
if (!dev) {
dev_err(&adev->dev, "%s(): amba_device_alloc() failed\n",
--
2.33.0
Hi Cheng,
I am severely behind in my patch review process and as such will not
be able to start reviewing your work before the week of February 7th.
Thanks,
Mathieu
On Fri, 7 Jan 2022 at 01:47, chenxiang <[email protected]> wrote:
>
> From: Xiang Chen <[email protected]>
>
> If not up all the cpus with command line "maxcpus=x", system will be
> blocked.
> We find that some amba devices such as ETM devices, are associated with
> special cpus, and if the cpu is not up, the register of associated device
> is not allowed to access. BIOS reports all the ETM device nodes and a
> amba device is created for every ETM device, so even if one cpu is not up,
> the amba device will still be created for the associated device, and also
> the register of device (pid and cid) will be accessed when adding amba
> device which will cause the issue.
> To fix it, skip creating amba device if it is associated with a cpu which
> is not online.
>
> Signed-off-by: Xiang Chen <[email protected]>
> ---
> drivers/acpi/acpi_amba.c | 36 ++++++++++++++++++++++++++++++++++++
> 1 file changed, 36 insertions(+)
>
> diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c
> index ab8a4e0191b1..2369198f734b 100644
> --- a/drivers/acpi/acpi_amba.c
> +++ b/drivers/acpi/acpi_amba.c
> @@ -16,6 +16,7 @@
> #include <linux/ioport.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> +#include <acpi/processor.h>
>
> #include "internal.h"
>
> @@ -45,6 +46,35 @@ static void amba_register_dummy_clk(void)
> clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
> }
>
> +static int acpi_handle_to_cpuid(acpi_handle handle)
> +{
> + int cpu = -1;
> + struct acpi_processor *pr;
> +
> + for_each_possible_cpu(cpu) {
> + pr = per_cpu(processors, cpu);
> + if (pr && pr->handle == handle)
> + break;
> + }
> +
> + return cpu;
> +}
> +
> +static int acpi_dev_get_cpu(struct acpi_device *adev)
> +{
> + acpi_handle cpu_handle;
> + acpi_status status;
> + int cpu;
> +
> + status = acpi_get_parent(adev->handle, &cpu_handle);
> + if (ACPI_FAILURE(status))
> + return -1;
> + cpu = acpi_handle_to_cpuid(cpu_handle);
> + if (cpu >= nr_cpu_ids)
> + return -1;
> + return cpu;
> +}
> +
> static int amba_handler_attach(struct acpi_device *adev,
> const struct acpi_device_id *id)
> {
> @@ -54,11 +84,17 @@ static int amba_handler_attach(struct acpi_device *adev,
> bool address_found = false;
> int irq_no = 0;
> int ret;
> + int cpu;
>
> /* If the ACPI node already has a physical device attached, skip it. */
> if (adev->physical_node_count)
> return 0;
>
> + /* If the cpu associated with the device is not online, skip it. */
> + cpu = acpi_dev_get_cpu(adev);
> + if (cpu >= 0 && !cpu_online(cpu))
> + return 0;
> +
> dev = amba_device_alloc(dev_name(&adev->dev), 0, 0);
> if (!dev) {
> dev_err(&adev->dev, "%s(): amba_device_alloc() failed\n",
> --
> 2.33.0
>
Hi Xiang
On 07/01/2022 08:41, chenxiang wrote:
> From: Xiang Chen <[email protected]>
>
> If not up all the cpus with command line "maxcpus=x", system will be
> blocked.
> We find that some amba devices such as ETM devices, are associated with
> special cpus, and if the cpu is not up, the register of associated device
> is not allowed to access. BIOS reports all the ETM device nodes and a
> amba device is created for every ETM device, so even if one cpu is not up,
> the amba device will still be created for the associated device, and also
> the register of device (pid and cid) will be accessed when adding amba
> device which will cause the issue.
> To fix it, skip creating amba device if it is associated with a cpu which
> is not online.
I understand the issue. We do not have an issue at least on DT based
platforms with a similar environment (Juno). The key is the power
management for the components.
There are two separate issues at play here :
1) Power management with ACPI. I believe there is a solution in progress
to address this.
2) The ETM is in the same power domain as that of the CPU and normal
device power management may not work without the CPU being online.
3) Additionally we have other issue of supporting system instructions
with ACPI, which do not appear on the AMBA bus.
Considering all of these, the ideal solution is to :
1) Implement power management for ACPI, which is anyway in progress
(at least for SCMI based systems)
2) Move the ETM driver away from the AMBA framework. That would make
the CPU online problem and the (3) above easier to solve.
Anshuman is going to look into this.
In the meantime, we could have this temporary fix in and solve it
forever by moving away from the AMBA.
>
> Signed-off-by: Xiang Chen <[email protected]>
> ---
> drivers/acpi/acpi_amba.c | 36 ++++++++++++++++++++++++++++++++++++
> 1 file changed, 36 insertions(+)
>
> diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c
> index ab8a4e0191b1..2369198f734b 100644
> --- a/drivers/acpi/acpi_amba.c
> +++ b/drivers/acpi/acpi_amba.c
> @@ -16,6 +16,7 @@
> #include <linux/ioport.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> +#include <acpi/processor.h>
>
> #include "internal.h"
>
> @@ -45,6 +46,35 @@ static void amba_register_dummy_clk(void)
> clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
> }
>
> +static int acpi_handle_to_cpuid(acpi_handle handle)
> +{
> + int cpu = -1;
> + struct acpi_processor *pr;
> +
> + for_each_possible_cpu(cpu) {
> + pr = per_cpu(processors, cpu);
> + if (pr && pr->handle == handle)
> + break;
> + }
> +
> + return cpu;
> +}
> +
Please could we reuse the function in coresight-platform.c ?
i.e, move it to a generic location and share it, rather than
duplicating it ?
> +static int acpi_dev_get_cpu(struct acpi_device *adev)
> +{
> + acpi_handle cpu_handle;
> + acpi_status status;
> + int cpu;
> +
> + status = acpi_get_parent(adev->handle, &cpu_handle);
> + if (ACPI_FAILURE(status))
> + return -1;
> + cpu = acpi_handle_to_cpuid(cpu_handle);
> + if (cpu >= nr_cpu_ids)
> + return -1;
> + return cpu;
> +}
> +
> static int amba_handler_attach(struct acpi_device *adev,
> const struct acpi_device_id *id)
> {
> @@ -54,11 +84,17 @@ static int amba_handler_attach(struct acpi_device *adev,
> bool address_found = false;
> int irq_no = 0;
> int ret;
> + int cpu;
>
> /* If the ACPI node already has a physical device attached, skip it. */
> if (adev->physical_node_count)
> return 0;
>
> + /* If the cpu associated with the device is not online, skip it. */
> + cpu = acpi_dev_get_cpu(adev);
> + if (cpu >= 0 && !cpu_online(cpu))
> + return 0;
> +
Except for the comment above, the patch looks good to me.
Suzuki
On 2022-01-28 11:00, Suzuki K Poulose wrote:
> Hi Xiang
>
> On 07/01/2022 08:41, chenxiang wrote:
>> From: Xiang Chen <[email protected]>
>>
>> If not up all the cpus with command line "maxcpus=x", system will be
>> blocked.
>> We find that some amba devices such as ETM devices, are associated with
>> special cpus, and if the cpu is not up, the register of associated device
>> is not allowed to access. BIOS reports all the ETM device nodes and a
>> amba device is created for every ETM device, so even if one cpu is not
>> up,
>> the amba device will still be created for the associated device, and also
>> the register of device (pid and cid) will be accessed when adding amba
>> device which will cause the issue.
>> To fix it, skip creating amba device if it is associated with a cpu which
>> is not online.
>
> I understand the issue. We do not have an issue at least on DT based
> platforms with a similar environment (Juno). The key is the power
> management for the components.
>
> There are two separate issues at play here :
>
> 1) Power management with ACPI. I believe there is a solution in progress
> to address this.
>
> 2) The ETM is in the same power domain as that of the CPU and normal
> device power management may not work without the CPU being online.
>
> 3) Additionally we have other issue of supporting system instructions
> with ACPI, which do not appear on the AMBA bus.
>
> Considering all of these, the ideal solution is to :
>
> 1) Implement power management for ACPI, which is anyway in progress
> (at least for SCMI based systems)
> 2) Move the ETM driver away from the AMBA framework. That would make
> the CPU online problem and the (3) above easier to solve.
> Anshuman is going to look into this.
>
> In the meantime, we could have this temporary fix in and solve it
> forever by moving away from the AMBA.
Out of curiosity, what happens when you boot with "maxcpus=" and then
manually bring up the extra cpus from userspace later? Is there a chance
that tracing will explode if some online CPUs have ETM initialised but
others don't?
Robin.
>>
>> Signed-off-by: Xiang Chen <[email protected]>
>> ---
>> drivers/acpi/acpi_amba.c | 36 ++++++++++++++++++++++++++++++++++++
>> 1 file changed, 36 insertions(+)
>>
>> diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c
>> index ab8a4e0191b1..2369198f734b 100644
>> --- a/drivers/acpi/acpi_amba.c
>> +++ b/drivers/acpi/acpi_amba.c
>> @@ -16,6 +16,7 @@
>> #include <linux/ioport.h>
>> #include <linux/kernel.h>
>> #include <linux/module.h>
>> +#include <acpi/processor.h>
>> #include "internal.h"
>> @@ -45,6 +46,35 @@ static void amba_register_dummy_clk(void)
>> clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
>> }
>> +static int acpi_handle_to_cpuid(acpi_handle handle)
>> +{
>> + int cpu = -1;
>> + struct acpi_processor *pr;
>> +
>> + for_each_possible_cpu(cpu) {
>> + pr = per_cpu(processors, cpu);
>> + if (pr && pr->handle == handle)
>> + break;
>> + }
>> +
>> + return cpu;
>> +}
>> +
>
> Please could we reuse the function in coresight-platform.c ?
> i.e, move it to a generic location and share it, rather than
> duplicating it ?
>
>
>> +static int acpi_dev_get_cpu(struct acpi_device *adev)
>> +{
>> + acpi_handle cpu_handle;
>> + acpi_status status;
>> + int cpu;
>> +
>> + status = acpi_get_parent(adev->handle, &cpu_handle);
>> + if (ACPI_FAILURE(status))
>> + return -1;
>> + cpu = acpi_handle_to_cpuid(cpu_handle);
>> + if (cpu >= nr_cpu_ids)
>> + return -1;
>> + return cpu;
>> +}
>> +
>> static int amba_handler_attach(struct acpi_device *adev,
>> const struct acpi_device_id *id)
>> {
>> @@ -54,11 +84,17 @@ static int amba_handler_attach(struct acpi_device
>> *adev,
>> bool address_found = false;
>> int irq_no = 0;
>> int ret;
>> + int cpu;
>> /* If the ACPI node already has a physical device attached, skip
>> it. */
>> if (adev->physical_node_count)
>> return 0;
>> + /* If the cpu associated with the device is not online, skip it. */
>> + cpu = acpi_dev_get_cpu(adev);
>> + if (cpu >= 0 && !cpu_online(cpu))
>> + return 0;
>> +
>
> Except for the comment above, the patch looks good to me.
>
> Suzuki
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On 28/01/2022 15:28, Robin Murphy wrote:
> On 2022-01-28 11:00, Suzuki K Poulose wrote:
>> Hi Xiang
>>
>> On 07/01/2022 08:41, chenxiang wrote:
>>> From: Xiang Chen <[email protected]>
>>>
>>> If not up all the cpus with command line "maxcpus=x", system will be
>>> blocked.
>>> We find that some amba devices such as ETM devices, are associated with
>>> special cpus, and if the cpu is not up, the register of associated
>>> device
>>> is not allowed to access. BIOS reports all the ETM device nodes and a
>>> amba device is created for every ETM device, so even if one cpu is
>>> not up,
>>> the amba device will still be created for the associated device, and
>>> also
>>> the register of device (pid and cid) will be accessed when adding amba
>>> device which will cause the issue.
>>> To fix it, skip creating amba device if it is associated with a cpu
>>> which
>>> is not online.
>>
>> I understand the issue. We do not have an issue at least on DT based
>> platforms with a similar environment (Juno). The key is the power
>> management for the components.
>>
>> There are two separate issues at play here :
>>
>> 1) Power management with ACPI. I believe there is a solution in progress
>> to address this.
>>
>> 2) The ETM is in the same power domain as that of the CPU and normal
>> device power management may not work without the CPU being online.
>>
>> 3) Additionally we have other issue of supporting system instructions
>> with ACPI, which do not appear on the AMBA bus.
>>
>> Considering all of these, the ideal solution is to :
>>
>> 1) Implement power management for ACPI, which is anyway in progress
>> (at least for SCMI based systems)
>> 2) Move the ETM driver away from the AMBA framework. That would make
>> the CPU online problem and the (3) above easier to solve.
>> Anshuman is going to look into this.
>>
>> In the meantime, we could have this temporary fix in and solve it
>> forever by moving away from the AMBA.
>
> Out of curiosity, what happens when you boot with "maxcpus=" and then
> manually bring up the extra cpus from userspace later? Is there a chance
> that tracing will explode if some online CPUs have ETM initialised but
> others don't?
No. At the moment, we don't bring the ETMs online for hotplugged CPUs.
i.e., ETM is not registered for the CPU. So any attempt to trace on that
CPU will fail.
Probe deferral doesn't go beyond the userspace init. But we plan to add
hotplug support, by defering the ETM probe.
At present, the ETMs are bound to the AMBA frame work and I am not sure
if there is a way to explicitly trigger a probe from the AMBA layer on
CPU hotplug. The other option is to always create the AMBA device and
defer the "etm" probe to a cpu hotplug notifier in the ETM driver.
This is something we plan to add.
Suzuki
>
> Robin.
>
>>>
>>> Signed-off-by: Xiang Chen <[email protected]>
>>> ---
>>> drivers/acpi/acpi_amba.c | 36 ++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 36 insertions(+)
>>>
>>> diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c
>>> index ab8a4e0191b1..2369198f734b 100644
>>> --- a/drivers/acpi/acpi_amba.c
>>> +++ b/drivers/acpi/acpi_amba.c
>>> @@ -16,6 +16,7 @@
>>> #include <linux/ioport.h>
>>> #include <linux/kernel.h>
>>> #include <linux/module.h>
>>> +#include <acpi/processor.h>
>>> #include "internal.h"
>>> @@ -45,6 +46,35 @@ static void amba_register_dummy_clk(void)
>>> clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
>>> }
>>> +static int acpi_handle_to_cpuid(acpi_handle handle)
>>> +{
>>> + int cpu = -1;
>>> + struct acpi_processor *pr;
>>> +
>>> + for_each_possible_cpu(cpu) {
>>> + pr = per_cpu(processors, cpu);
>>> + if (pr && pr->handle == handle)
>>> + break;
>>> + }
>>> +
>>> + return cpu;
>>> +}
>>> +
>>
>> Please could we reuse the function in coresight-platform.c ?
>> i.e, move it to a generic location and share it, rather than
>> duplicating it ?
>>
>>
>>> +static int acpi_dev_get_cpu(struct acpi_device *adev)
>>> +{
>>> + acpi_handle cpu_handle;
>>> + acpi_status status;
>>> + int cpu;
>>> +
>>> + status = acpi_get_parent(adev->handle, &cpu_handle);
>>> + if (ACPI_FAILURE(status))
>>> + return -1;
>>> + cpu = acpi_handle_to_cpuid(cpu_handle);
>>> + if (cpu >= nr_cpu_ids)
>>> + return -1;
>>> + return cpu;
>>> +}
>>> +
>>> static int amba_handler_attach(struct acpi_device *adev,
>>> const struct acpi_device_id *id)
>>> {
>>> @@ -54,11 +84,17 @@ static int amba_handler_attach(struct acpi_device
>>> *adev,
>>> bool address_found = false;
>>> int irq_no = 0;
>>> int ret;
>>> + int cpu;
>>> /* If the ACPI node already has a physical device attached,
>>> skip it. */
>>> if (adev->physical_node_count)
>>> return 0;
>>> + /* If the cpu associated with the device is not online, skip it. */
>>> + cpu = acpi_dev_get_cpu(adev);
>>> + if (cpu >= 0 && !cpu_online(cpu))
>>> + return 0;
>>> +
>>
>> Except for the comment above, the patch looks good to me.
>>
>> Suzuki
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> [email protected]
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Hi Suzuki,
在 2022/1/28 19:00, Suzuki K Poulose 写道:
> Hi Xiang
>
> On 07/01/2022 08:41, chenxiang wrote:
>> From: Xiang Chen <[email protected]>
>>
>> If not up all the cpus with command line "maxcpus=x", system will be
>> blocked.
>> We find that some amba devices such as ETM devices, are associated with
>> special cpus, and if the cpu is not up, the register of associated
>> device
>> is not allowed to access. BIOS reports all the ETM device nodes and a
>> amba device is created for every ETM device, so even if one cpu is
>> not up,
>> the amba device will still be created for the associated device, and
>> also
>> the register of device (pid and cid) will be accessed when adding amba
>> device which will cause the issue.
>> To fix it, skip creating amba device if it is associated with a cpu
>> which
>> is not online.
>
> I understand the issue. We do not have an issue at least on DT based
> platforms with a similar environment (Juno). The key is the power
> management for the components.
>
> There are two separate issues at play here :
>
> 1) Power management with ACPI. I believe there is a solution in progress
> to address this.
>
> 2) The ETM is in the same power domain as that of the CPU and normal
> device power management may not work without the CPU being online.
>
> 3) Additionally we have other issue of supporting system instructions
> with ACPI, which do not appear on the AMBA bus.
>
> Considering all of these, the ideal solution is to :
>
> 1) Implement power management for ACPI, which is anyway in progress
> (at least for SCMI based systems)
> 2) Move the ETM driver away from the AMBA framework. That would make
> the CPU online problem and the (3) above easier to solve.
> Anshuman is going to look into this.
>
> In the meantime, we could have this temporary fix in and solve it
> forever by moving away from the AMBA.
Ok, thanks.
Looking forward to the ideal soluation and i am glad to test it after
implemented.
>
>>
>> Signed-off-by: Xiang Chen <[email protected]>
>> ---
>> drivers/acpi/acpi_amba.c | 36 ++++++++++++++++++++++++++++++++++++
>> 1 file changed, 36 insertions(+)
>>
>> diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c
>> index ab8a4e0191b1..2369198f734b 100644
>> --- a/drivers/acpi/acpi_amba.c
>> +++ b/drivers/acpi/acpi_amba.c
>> @@ -16,6 +16,7 @@
>> #include <linux/ioport.h>
>> #include <linux/kernel.h>
>> #include <linux/module.h>
>> +#include <acpi/processor.h>
>> #include "internal.h"
>> @@ -45,6 +46,35 @@ static void amba_register_dummy_clk(void)
>> clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
>> }
>> +static int acpi_handle_to_cpuid(acpi_handle handle)
>> +{
>> + int cpu = -1;
>> + struct acpi_processor *pr;
>> +
>> + for_each_possible_cpu(cpu) {
>> + pr = per_cpu(processors, cpu);
>> + if (pr && pr->handle == handle)
>> + break;
>> + }
>> +
>> + return cpu;
>> +}
>> +
>
> Please could we reuse the function in coresight-platform.c ?
> i.e, move it to a generic location and share it, rather than
> duplicating it ?
>
>
>> +static int acpi_dev_get_cpu(struct acpi_device *adev)
>> +{
>> + acpi_handle cpu_handle;
>> + acpi_status status;
>> + int cpu;
>> +
>> + status = acpi_get_parent(adev->handle, &cpu_handle);
>> + if (ACPI_FAILURE(status))
>> + return -1;
>> + cpu = acpi_handle_to_cpuid(cpu_handle);
>> + if (cpu >= nr_cpu_ids)
>> + return -1;
>> + return cpu;
>> +}
>> +
>> static int amba_handler_attach(struct acpi_device *adev,
>> const struct acpi_device_id *id)
>> {
>> @@ -54,11 +84,17 @@ static int amba_handler_attach(struct acpi_device
>> *adev,
>> bool address_found = false;
>> int irq_no = 0;
>> int ret;
>> + int cpu;
>> /* If the ACPI node already has a physical device attached,
>> skip it. */
>> if (adev->physical_node_count)
>> return 0;
>> + /* If the cpu associated with the device is not online, skip
>> it. */
>> + cpu = acpi_dev_get_cpu(adev);
>> + if (cpu >= 0 && !cpu_online(cpu))
>> + return 0;
>> +
>
> Except for the comment above, the patch looks good to me.
>
> Suzuki
> .
>