2015-07-13 13:55:10

by Lukasz Anaczkowski

[permalink] [raw]
Subject: [PATCH] x86, acpi: Handle xapic/x2apic entries in MADT

This patch is based on work of "Yinghai Lu <[email protected]>"
previously published at https://lkml.org/lkml/2013/1/21/563.

In case when BIOS is populating MADT wiht both x2apic and local apic
entries (as per ACPI spec), kernel builds it's processor table
in the following order: BSP, X2APIC, local APIC, resulting in
processors on the same core are not separated by core count.

This patch fixes this behavior and resulting assignment is
correct.

Signed-off-by: Lukasz Anaczkowski <[email protected]>
---
arch/x86/kernel/acpi/boot.c | 29 +++++++++++++-----
drivers/acpi/numa.c | 28 ++++++++++++-----
drivers/acpi/tables.c | 75 ++++++++++++++++++++++++++++-----------------
drivers/irqchip/irq-gic.c | 15 ++++++---
include/linux/acpi.h | 13 ++++++--
5 files changed, 111 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e49ee24..1bb79d5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -981,6 +981,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
{
int count;
int x2count = 0;
+ struct acpi_subtable_proc madt_proc[2];

if (!cpu_has_apic)
return -ENODEV;
@@ -1004,10 +1005,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
acpi_parse_sapic, MAX_LOCAL_APIC);

if (!count) {
- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
- acpi_parse_x2apic, MAX_LOCAL_APIC);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
- acpi_parse_lapic, MAX_LOCAL_APIC);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+ madt_proc[0].handler = acpi_parse_lapic;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+ madt_proc[1].handler = acpi_parse_x2apic;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
}
if (!count && !x2count) {
printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -1019,10 +1026,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
return count;
}

- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
- acpi_parse_x2apic_nmi, 0);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
- acpi_parse_lapic_nmi, 0);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC_NMI;
+ madt_proc[0].handler = acpi_parse_lapic_nmi;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC_NMI;
+ madt_proc[1].handler = acpi_parse_x2apic_nmi;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), 0);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
/* TBD: Cleanup to allow fallback to MPS */
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index acaa3b4..2bdff0c 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -314,9 +314,15 @@ static int __init
acpi_table_parse_srat(enum acpi_srat_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_SRAT,
- sizeof(struct acpi_table_srat), id,
- handler, max_entries);
+ struct acpi_subtable_proc srat_proc;
+
+ memset(&srat_proc, 0, sizeof(srat_proc));
+ srat_proc.id = id;
+ srat_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ &srat_proc, 1, max_entries);
}

int __init acpi_numa_init(void)
@@ -331,10 +337,18 @@ int __init acpi_numa_init(void)

/* SRAT: Static Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
- acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
- acpi_parse_x2apic_affinity, 0);
- acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
- acpi_parse_processor_affinity, 0);
+ struct acpi_subtable_proc srat_proc[2];
+
+ memset(srat_proc, 0, sizeof(srat_proc));
+ srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
+ srat_proc[0].handler = acpi_parse_processor_affinity;
+ srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
+ srat_proc[1].handler = acpi_parse_x2apic_affinity;
+
+ acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ srat_proc, ARRAY_SIZE(srat_proc), 0);
+
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 2e19189..1217e41 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -216,25 +216,27 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)

int __init
acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries)
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries)
{
struct acpi_subtable_header *entry;
int count = 0;
unsigned long table_end;
+ int i;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
-
- if (!table_size)
+ }
+ if (!table_size) {
+ proc[0].count = -EINVAL;
return -EINVAL;
+ }

if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

@@ -247,12 +249,17 @@ acpi_parse_entries(char *id, unsigned long table_size,

while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
table_end) {
- if (entry->type == entry_id
- && (!max_entries || count < max_entries)) {
- if (handler(entry, table_end))
+ for (i = 0; i < proc_num; i++) {
+ if (entry->type != proc[i].id)
+ continue;
+ if (max_entries && count++ >= max_entries)
+ continue;
+ if (proc[i].handler(entry, table_end)) {
+ proc[i].count = -EINVAL;
return -EINVAL;
-
- count++;
+ }
+ proc[i].count++;
+ break;
}

/*
@@ -260,7 +267,11 @@ acpi_parse_entries(char *id, unsigned long table_size,
* infinite loop.
*/
if (entry->length == 0) {
- pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, entry_id);
+ pr_err("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] Invalid zero length\n");
+ proc[0].count = -EINVAL;
return -EINVAL;
}

@@ -269,18 +280,20 @@ acpi_parse_entries(char *id, unsigned long table_size,
}

if (max_entries && count > max_entries) {
- pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
- id, entry_id, count - max_entries, count);
+ pr_warn("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] ignored %i entries of %i found\n",
+ count-max_entries, count);
}

return count;
}

int __init
-acpi_table_parse_entries(char *id,
+acpi_table_parse_entries_array(char *id,
unsigned long table_size,
- int entry_id,
- acpi_tbl_entry_handler handler,
+ struct acpi_subtable_proc *proc, int proc_num,
unsigned int max_entries)
{
struct acpi_table_header *table_header = NULL;
@@ -288,11 +301,10 @@ acpi_table_parse_entries(char *id,
int count;
u32 instance = 0;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
+ }

if (!strncmp(id, ACPI_SIG_MADT, 4))
instance = acpi_apic_instance;
@@ -300,11 +312,12 @@ acpi_table_parse_entries(char *id,
acpi_get_table_with_size(id, instance, &table_header, &tbl_size);
if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

- count = acpi_parse_entries(id, table_size, handler, table_header,
- entry_id, max_entries);
+ count = acpi_parse_entries(id, table_size, table_header,
+ proc, proc_num, max_entries);

early_acpi_os_unmap_memory((char *)table_header, tbl_size);
return count;
@@ -314,9 +327,15 @@ int __init
acpi_table_parse_madt(enum acpi_madt_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_MADT,
- sizeof(struct acpi_table_madt), id,
- handler, max_entries);
+ struct acpi_subtable_proc madt_proc;
+
+ memset(&madt_proc, 0, sizeof(madt_proc));
+ madt_proc.id = id;
+ madt_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ &madt_proc, 1, max_entries);
}

/**
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4dd8826..d004a32 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1091,12 +1091,16 @@ gic_v2_acpi_init(struct acpi_table_header *table)
{
void __iomem *cpu_base, *dist_base;
int count;
+ struct acpi_subtable_proc gic_proc;
+
+ memset(gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
+ gic_proc.handler = gic_acpi_parse_madt_cpu;

/* Collect CPU base addresses */
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_cpu, table,
- ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
+ table, gic_proc, 0);
if (count <= 0) {
pr_err("No valid GICC entries exist\n");
return -EINVAL;
@@ -1106,10 +1110,13 @@ gic_v2_acpi_init(struct acpi_table_header *table)
* Find distributor base address. We expect one distributor entry since
* ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
*/
+ memset(gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR;
+ gic_proc.handler = gic_acpi_parse_madt_distributor;
+
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_distributor, table,
- ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
+ table, gic_proc, 0);
if (count <= 0) {
pr_err("No valid GICD entries exist\n");
return -EINVAL;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d2445fa..59b17e8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -135,6 +135,12 @@ static inline void acpi_initrd_override(void *data, size_t size)
(!entry) || (unsigned long)entry + sizeof(*entry) > end || \
((struct acpi_subtable_header *)entry)->length < sizeof(*entry))

+struct acpi_subtable_proc {
+ int id;
+ acpi_tbl_entry_handler handler;
+ int count;
+};
+
char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
void __acpi_unmap_table(char *map, unsigned long size);
int early_acpi_boot_init(void);
@@ -145,10 +151,13 @@ int acpi_numa_init (void);

int acpi_table_init (void);
int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
+int acpi_table_parse_entries_array(char *id, unsigned long table_size,
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries);
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_table_parse_entries(char *id, unsigned long table_size,
int entry_id,
acpi_tbl_entry_handler handler,
--
1.8.3.1

--------------------------------------------------------------------

Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek
przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by
others is strictly prohibited.


2015-07-13 15:22:22

by Hanjun Guo

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle xapic/x2apic entries in MADT

Hi Lukasz,

On 07/13/2015 09:54 PM, Lukasz Anaczkowski wrote:
> This patch is based on work of "Yinghai Lu <[email protected]>"
> previously published at https://lkml.org/lkml/2013/1/21/563.
>
> In case when BIOS is populating MADT wiht both x2apic and local apic
> entries (as per ACPI spec), kernel builds it's processor table
> in the following order: BSP, X2APIC, local APIC, resulting in
> processors on the same core are not separated by core count.

I'm confused here, I can't figure out where is the problem, could you
explain it in detail?

For me, logical CPU IDs allocated by OS will have mappings to physical
CPU IDs, what you are saying seems CPU topology problem to me.

>
> This patch fixes this behavior and resulting assignment is
> correct.
>
> Signed-off-by: Lukasz Anaczkowski <[email protected]>
> ---
> arch/x86/kernel/acpi/boot.c | 29 +++++++++++++-----
> drivers/acpi/numa.c | 28 ++++++++++++-----
> drivers/acpi/tables.c | 75 ++++++++++++++++++++++++++++-----------------
> drivers/irqchip/irq-gic.c | 15 ++++++---

Anyway, You also miss the SMP boot for ARM64 in ACPI way, which will
cause compile error on ARM64.

Thanks
Hanjun

2015-07-14 08:06:59

by Lukasz Anaczkowski

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle xapic/x2apic entries in MADT

On 07/13/2015 23:22 PM, Hanjun Guo wrote:
>> In case when BIOS is populating MADT wiht both x2apic and local apic
>> entries (as per ACPI spec), kernel builds it's processor table
>> in the following order: BSP, X2APIC, local APIC, resulting in
>> processors on the same core are not separated by core count.
>
> I'm confused here, I can't figure out where is the problem, could you
> explain it in detail?
>
> For me, logical CPU IDs allocated by OS will have mappings to physical
> CPU IDs, what you are saying seems CPU topology problem to me.

Sorry for confusion, I'll send another patch with updated commit message
where I'll try to put some more details and explanation. Hope that one will
be less confusing.

>> arch/x86/kernel/acpi/boot.c | 29 +++++++++++++-----
>> drivers/acpi/numa.c | 28 ++++++++++++-----
>> drivers/acpi/tables.c | 75 ++++++++++++++++++++++++++++-----------------
>> drivers/irqchip/irq-gic.c | 15 ++++++---
>
> Anyway, You also miss the SMP boot for ARM64 in ACPI way, which will
> cause compile error on ARM64.

The only symbol that's changed is acpi_parse_entries and I don't see
any calls to it in ARM64. I also tried compilation of arm64/defconfig
and it compiles fine. Could yoy point me to place where it might failing?

Thanks,
Lukasz
--------------------------------------------------------------------

Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek
przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by
others is strictly prohibited.

2015-07-14 08:07:27

by Lukasz Anaczkowski

[permalink] [raw]
Subject: [PATCH] x86, acpi: Handle xapic/x2apic entries in MADT

This patch is based on work of "Yinghai Lu <[email protected]>"
previously published at https://lkml.org/lkml/2013/1/21/563.

In case when BIOS is populating MADT wiht both x2apic and local apic
entries (as per ACPI spec), e.g. for Xeon Phi Knights Landing,
kernel builds it's processor table in the following order:
BSP, X2APIC, local APIC, resulting in processors on the same core
are not separated by core count, i.e.

Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
0 0 ( 0 [0000]), 97 ( 1 [0001]), 145 ( 2 [0002]), 193 ( 3 [0003])
1 50 ( 4 [0004]), 98 ( 5 [0005]), 146 ( 6 [0006]), 194 ( 7 [0007])
2 51 ( 16 [0010]), 99 ( 17 [0011]), 147 ( 18 [0012]), 195 ( 19 [0013])
3 52 ( 20 [0014]), 100 ( 21 [0015]), 148 ( 22 [0016]), 196 ( 23 [0017])
4 53 ( 24 [0018]), 101 ( 25 [0019]), 149 ( 26 [001a]), 197 ( 27 [001b])
5 54 ( 28 [001c]), 102 ( 29 [001d]), 150 ( 30 [001e]), 198 ( 31 [001f])
...

Please note, how LCpu are mixed for physical cores (Core).

This patch fixes this behavior and resulting assignment is
consistent with other Xeon processors, i.e.

Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
0 0 ( 0 [0000]), 72 ( 1 [0001]), 144 ( 2 [0002]), 216 ( 3 [0003])
1 1 ( 4 [0004]), 73 ( 5 [0005]), 145 ( 6 [0006]), 217 ( 7 [0007])
2 2 ( 8 [0008]), 74 ( 9 [0009]), 146 ( 10 [000a]), 218 ( 11 [000b])
3 3 ( 12 [000c]), 75 ( 13 [000d]), 147 ( 14 [000e]), 219 ( 15 [000f])
4 4 ( 16 [0010]), 76 ( 17 [0011]), 148 ( 18 [0012]), 220 ( 19 [0013])
5 5 ( 20 [0014]), 77 ( 21 [0015]), 149 ( 22 [0016]), 221 ( 23 [0017])
...

Signed-off-by: Lukasz Anaczkowski <[email protected]>
---
arch/x86/kernel/acpi/boot.c | 29 +++++++++++++-----
drivers/acpi/numa.c | 28 ++++++++++++-----
drivers/acpi/tables.c | 75 ++++++++++++++++++++++++++++-----------------
drivers/irqchip/irq-gic.c | 15 ++++++---
include/linux/acpi.h | 13 ++++++--
5 files changed, 111 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e49ee24..1bb79d5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -981,6 +981,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
{
int count;
int x2count = 0;
+ struct acpi_subtable_proc madt_proc[2];

if (!cpu_has_apic)
return -ENODEV;
@@ -1004,10 +1005,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
acpi_parse_sapic, MAX_LOCAL_APIC);

if (!count) {
- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
- acpi_parse_x2apic, MAX_LOCAL_APIC);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
- acpi_parse_lapic, MAX_LOCAL_APIC);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+ madt_proc[0].handler = acpi_parse_lapic;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+ madt_proc[1].handler = acpi_parse_x2apic;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
}
if (!count && !x2count) {
printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -1019,10 +1026,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
return count;
}

- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
- acpi_parse_x2apic_nmi, 0);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
- acpi_parse_lapic_nmi, 0);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC_NMI;
+ madt_proc[0].handler = acpi_parse_lapic_nmi;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC_NMI;
+ madt_proc[1].handler = acpi_parse_x2apic_nmi;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), 0);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
/* TBD: Cleanup to allow fallback to MPS */
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index acaa3b4..2bdff0c 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -314,9 +314,15 @@ static int __init
acpi_table_parse_srat(enum acpi_srat_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_SRAT,
- sizeof(struct acpi_table_srat), id,
- handler, max_entries);
+ struct acpi_subtable_proc srat_proc;
+
+ memset(&srat_proc, 0, sizeof(srat_proc));
+ srat_proc.id = id;
+ srat_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ &srat_proc, 1, max_entries);
}

int __init acpi_numa_init(void)
@@ -331,10 +337,18 @@ int __init acpi_numa_init(void)

/* SRAT: Static Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
- acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
- acpi_parse_x2apic_affinity, 0);
- acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
- acpi_parse_processor_affinity, 0);
+ struct acpi_subtable_proc srat_proc[2];
+
+ memset(srat_proc, 0, sizeof(srat_proc));
+ srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
+ srat_proc[0].handler = acpi_parse_processor_affinity;
+ srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
+ srat_proc[1].handler = acpi_parse_x2apic_affinity;
+
+ acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ srat_proc, ARRAY_SIZE(srat_proc), 0);
+
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 2e19189..1217e41 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -216,25 +216,27 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)

int __init
acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries)
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries)
{
struct acpi_subtable_header *entry;
int count = 0;
unsigned long table_end;
+ int i;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
-
- if (!table_size)
+ }
+ if (!table_size) {
+ proc[0].count = -EINVAL;
return -EINVAL;
+ }

if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

@@ -247,12 +249,17 @@ acpi_parse_entries(char *id, unsigned long table_size,

while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
table_end) {
- if (entry->type == entry_id
- && (!max_entries || count < max_entries)) {
- if (handler(entry, table_end))
+ for (i = 0; i < proc_num; i++) {
+ if (entry->type != proc[i].id)
+ continue;
+ if (max_entries && count++ >= max_entries)
+ continue;
+ if (proc[i].handler(entry, table_end)) {
+ proc[i].count = -EINVAL;
return -EINVAL;
-
- count++;
+ }
+ proc[i].count++;
+ break;
}

/*
@@ -260,7 +267,11 @@ acpi_parse_entries(char *id, unsigned long table_size,
* infinite loop.
*/
if (entry->length == 0) {
- pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, entry_id);
+ pr_err("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] Invalid zero length\n");
+ proc[0].count = -EINVAL;
return -EINVAL;
}

@@ -269,18 +280,20 @@ acpi_parse_entries(char *id, unsigned long table_size,
}

if (max_entries && count > max_entries) {
- pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
- id, entry_id, count - max_entries, count);
+ pr_warn("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] ignored %i entries of %i found\n",
+ count-max_entries, count);
}

return count;
}

int __init
-acpi_table_parse_entries(char *id,
+acpi_table_parse_entries_array(char *id,
unsigned long table_size,
- int entry_id,
- acpi_tbl_entry_handler handler,
+ struct acpi_subtable_proc *proc, int proc_num,
unsigned int max_entries)
{
struct acpi_table_header *table_header = NULL;
@@ -288,11 +301,10 @@ acpi_table_parse_entries(char *id,
int count;
u32 instance = 0;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
+ }

if (!strncmp(id, ACPI_SIG_MADT, 4))
instance = acpi_apic_instance;
@@ -300,11 +312,12 @@ acpi_table_parse_entries(char *id,
acpi_get_table_with_size(id, instance, &table_header, &tbl_size);
if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

- count = acpi_parse_entries(id, table_size, handler, table_header,
- entry_id, max_entries);
+ count = acpi_parse_entries(id, table_size, table_header,
+ proc, proc_num, max_entries);

early_acpi_os_unmap_memory((char *)table_header, tbl_size);
return count;
@@ -314,9 +327,15 @@ int __init
acpi_table_parse_madt(enum acpi_madt_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_MADT,
- sizeof(struct acpi_table_madt), id,
- handler, max_entries);
+ struct acpi_subtable_proc madt_proc;
+
+ memset(&madt_proc, 0, sizeof(madt_proc));
+ madt_proc.id = id;
+ madt_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ &madt_proc, 1, max_entries);
}

/**
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4dd8826..d004a32 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1091,12 +1091,16 @@ gic_v2_acpi_init(struct acpi_table_header *table)
{
void __iomem *cpu_base, *dist_base;
int count;
+ struct acpi_subtable_proc gic_proc;
+
+ memset(gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
+ gic_proc.handler = gic_acpi_parse_madt_cpu;

/* Collect CPU base addresses */
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_cpu, table,
- ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
+ table, gic_proc, 0);
if (count <= 0) {
pr_err("No valid GICC entries exist\n");
return -EINVAL;
@@ -1106,10 +1110,13 @@ gic_v2_acpi_init(struct acpi_table_header *table)
* Find distributor base address. We expect one distributor entry since
* ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
*/
+ memset(gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR;
+ gic_proc.handler = gic_acpi_parse_madt_distributor;
+
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_distributor, table,
- ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
+ table, gic_proc, 0);
if (count <= 0) {
pr_err("No valid GICD entries exist\n");
return -EINVAL;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d2445fa..59b17e8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -135,6 +135,12 @@ static inline void acpi_initrd_override(void *data, size_t size)
(!entry) || (unsigned long)entry + sizeof(*entry) > end || \
((struct acpi_subtable_header *)entry)->length < sizeof(*entry))

+struct acpi_subtable_proc {
+ int id;
+ acpi_tbl_entry_handler handler;
+ int count;
+};
+
char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
void __acpi_unmap_table(char *map, unsigned long size);
int early_acpi_boot_init(void);
@@ -145,10 +151,13 @@ int acpi_numa_init (void);

int acpi_table_init (void);
int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
+int acpi_table_parse_entries_array(char *id, unsigned long table_size,
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries);
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_table_parse_entries(char *id, unsigned long table_size,
int entry_id,
acpi_tbl_entry_handler handler,
--
1.8.3.1

--------------------------------------------------------------------

Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek
przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by
others is strictly prohibited.

2015-07-14 08:26:35

by Jiang Liu

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle xapic/x2apic entries in MADT

On 2015/7/14 16:06, Lukasz Anaczkowski wrote:
> This patch is based on work of "Yinghai Lu <[email protected]>"
> previously published at https://lkml.org/lkml/2013/1/21/563.
>
> In case when BIOS is populating MADT wiht both x2apic and local apic
> entries (as per ACPI spec), e.g. for Xeon Phi Knights Landing,
> kernel builds it's processor table in the following order:
> BSP, X2APIC, local APIC, resulting in processors on the same core
> are not separated by core count, i.e.
>
> Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
> 0 0 ( 0 [0000]), 97 ( 1 [0001]), 145 ( 2 [0002]), 193 ( 3 [0003])
> 1 50 ( 4 [0004]), 98 ( 5 [0005]), 146 ( 6 [0006]), 194 ( 7 [0007])
> 2 51 ( 16 [0010]), 99 ( 17 [0011]), 147 ( 18 [0012]), 195 ( 19 [0013])
> 3 52 ( 20 [0014]), 100 ( 21 [0015]), 148 ( 22 [0016]), 196 ( 23 [0017])
> 4 53 ( 24 [0018]), 101 ( 25 [0019]), 149 ( 26 [001a]), 197 ( 27 [001b])
> 5 54 ( 28 [001c]), 102 ( 29 [001d]), 150 ( 30 [001e]), 198 ( 31 [001f])
> ...
>
> Please note, how LCpu are mixed for physical cores (Core).
>
> This patch fixes this behavior and resulting assignment is
> consistent with other Xeon processors, i.e.
>
> Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
> 0 0 ( 0 [0000]), 72 ( 1 [0001]), 144 ( 2 [0002]), 216 ( 3 [0003])
> 1 1 ( 4 [0004]), 73 ( 5 [0005]), 145 ( 6 [0006]), 217 ( 7 [0007])
> 2 2 ( 8 [0008]), 74 ( 9 [0009]), 146 ( 10 [000a]), 218 ( 11 [000b])
> 3 3 ( 12 [000c]), 75 ( 13 [000d]), 147 ( 14 [000e]), 219 ( 15 [000f])
> 4 4 ( 16 [0010]), 76 ( 17 [0011]), 148 ( 18 [0012]), 220 ( 19 [0013])
> 5 5 ( 20 [0014]), 77 ( 21 [0015]), 149 ( 22 [0016]), 221 ( 23 [0017])
> ...
>
> Signed-off-by: Lukasz Anaczkowski <[email protected]>
Hi Lukasz,
I have some concerns here about "maxcpus" and "nox2apic" kernel
parameters. Say "maxcpus=72 nox2apic" is specified, user may get
less than 72 CPUs with you patch applied. Original code will try to
only all xapic CPUs before trying x2apic CPUs, so "maxcpus" doesn't
conflict with "nox2apic".
Thanks!
Gerry

2015-07-14 11:17:45

by Lukasz Anaczkowski

[permalink] [raw]
Subject: RE: [PATCH] x86, acpi: Handle xapic/x2apic entries in MADT

> I have some concerns here about "maxcpus" and "nox2apic" kernel parameters. Say "maxcpus=72 nox2apic" is specified, user may get less than 72 CPUs with you patch applied. Original code will try to only all xapic CPUs before trying x2apic CPUs, so "maxcpus" doesn't conflict with "nox2apic".

HI Gerry,

'nox2apic' is causing kernel panics with and w/o my patch.
Here's the stack trace:

#0 panic (fmt=0xffffffff818043c8 "timer doesn't work through Interrupt-remapped IO-APIC") at kernel/panic.c:72
#1 0xffffffff813df48c in panic_if_irq_remap (msg=<optimized out>) at drivers/iommu/irq_remapping.c:152
#2 0xffffffff81b5a705 in check_timer () at arch/x86/kernel/apic/io_apic.c:2124
#3 0xffffffff81b5afa3 in setup_IO_APIC () at arch/x86/kernel/apic/io_apic.c:2278
#4 0xffffffff81b59655 in apic_bsp_setup (upmode=<optimized out>) at arch/x86/kernel/apic/apic.c:2184
#5 0xffffffff81b57370 in native_smp_prepare_cpus (max_cpus=<optimized out>) at arch/x86/kernel/smpboot.c:1201
#6 0xffffffff81b49182 in smp_prepare_cpus (max_cpus=<optimized out>) at ./arch/x86/include/asm/smp.h:99
#7 kernel_init_freeable () at init/main.c:999
#8 0xffffffff8155c64e in kernel_init (unused=<optimized out>) at init/main.c:937
#9 0xffffffff8156f51f in ret_from_fork () at arch/x86/entry/entry_64.S:526
#10 0x0000000000000000 in ?? ()

On HSW i7-4770, I'm getting 8 CPUs booted, regardless of the 'nox2apic' setting w/ my patch.

Cheers,
Lukasz
--------------------------------------------------------------------

Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek
przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by
others is strictly prohibited.

2015-07-14 14:24:25

by Hanjun Guo

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle xapic/x2apic entries in MADT

On 07/14/2015 04:06 PM, Lukasz Anaczkowski wrote:
> On 07/13/2015 23:22 PM, Hanjun Guo wrote:
>>> In case when BIOS is populating MADT wiht both x2apic and local apic
>>> entries (as per ACPI spec), kernel builds it's processor table
>>> in the following order: BSP, X2APIC, local APIC, resulting in
>>> processors on the same core are not separated by core count.
>>
>> I'm confused here, I can't figure out where is the problem, could you
>> explain it in detail?
>>
>> For me, logical CPU IDs allocated by OS will have mappings to physical
>> CPU IDs, what you are saying seems CPU topology problem to me.
>
> Sorry for confusion, I'll send another patch with updated commit message
> where I'll try to put some more details and explanation. Hope that one will
> be less confusing.
>
>>> arch/x86/kernel/acpi/boot.c | 29 +++++++++++++-----
>>> drivers/acpi/numa.c | 28 ++++++++++++-----
>>> drivers/acpi/tables.c | 75 ++++++++++++++++++++++++++++-----------------
>>> drivers/irqchip/irq-gic.c | 15 ++++++---
>>
>> Anyway, You also miss the SMP boot for ARM64 in ACPI way, which will
>> cause compile error on ARM64.
>
> The only symbol that's changed is acpi_parse_entries and I don't see
> any calls to it in ARM64. I also tried compilation of arm64/defconfig
> and it compiles fine. Could yoy point me to place where it might failing?

Sorry, I misread your patch that you modified acpi_table_parse_madt()
also, but it turns out that you keep it as before, sorry for the noise.

Thanks
Hanjun

2015-07-21 08:27:49

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle xapic/x2apic entries in MADT

On Tue, 14 Jul 2015, Lukasz Anaczkowski wrote:
> This patch is based on work of "Yinghai Lu <[email protected]>"
> previously published at https://lkml.org/lkml/2013/1/21/563.
>
> In case when BIOS is populating MADT wiht both x2apic and local apic
> entries (as per ACPI spec), e.g. for Xeon Phi Knights Landing,
> kernel builds it's processor table in the following order:
> BSP, X2APIC, local APIC, resulting in processors on the same core
> are not separated by core count, i.e.

You are missing to explain WHY this is the wrong ordering.

> Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
> 0 0 ( 0 [0000]), 97 ( 1 [0001]), 145 ( 2 [0002]), 193 ( 3 [0003])
> 1 50 ( 4 [0004]), 98 ( 5 [0005]), 146 ( 6 [0006]), 194 ( 7 [0007])
> 2 51 ( 16 [0010]), 99 ( 17 [0011]), 147 ( 18 [0012]), 195 ( 19 [0013])
> 3 52 ( 20 [0014]), 100 ( 21 [0015]), 148 ( 22 [0016]), 196 ( 23 [0017])
> 4 53 ( 24 [0018]), 101 ( 25 [0019]), 149 ( 26 [001a]), 197 ( 27 [001b])
> 5 54 ( 28 [001c]), 102 ( 29 [001d]), 150 ( 30 [001e]), 198 ( 31 [001f])
> ...
>
> Please note, how LCpu are mixed for physical cores (Core).
>
> This patch fixes this behavior and resulting assignment is
> consistent with other Xeon processors, i.e.

You are missing to explain HOW you fix it. It's completely non obvious
why the conversion to an parse array makes it work.

> if (!count) {
> - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
> - acpi_parse_x2apic, MAX_LOCAL_APIC);
> - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
> - acpi_parse_lapic, MAX_LOCAL_APIC);
> + memset(madt_proc, 0, sizeof(madt_proc));
> + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
> + madt_proc[0].handler = acpi_parse_lapic;
> + madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
> + madt_proc[1].handler = acpi_parse_x2apic;

Here you revert the parse order.

> + acpi_table_parse_entries_array(ACPI_SIG_MADT,
> + sizeof(struct acpi_table_madt),
> + madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
> + count = madt_proc[0].count;
> + x2count = madt_proc[1].count;
> }
> if (!count && !x2count) {
> printk(KERN_ERR PREFIX "No LAPIC entries present\n");
> @@ -1019,10 +1026,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
> return count;
> }
>
> - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
> - acpi_parse_x2apic_nmi, 0);
> - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
> - acpi_parse_lapic_nmi, 0);
> + memset(madt_proc, 0, sizeof(madt_proc));
> + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC_NMI;
> + madt_proc[0].handler = acpi_parse_lapic_nmi;
> + madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC_NMI;
> + madt_proc[1].handler = acpi_parse_x2apic_nmi;

Ditto

> int __init acpi_numa_init(void)
> @@ -331,10 +337,18 @@ int __init acpi_numa_init(void)
>
> /* SRAT: Static Resource Affinity Table */
> if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
> - acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
> - acpi_parse_x2apic_affinity, 0);
> - acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
> - acpi_parse_processor_affinity, 0);
> + struct acpi_subtable_proc srat_proc[2];
> +
> + memset(srat_proc, 0, sizeof(srat_proc));
> + srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
> + srat_proc[0].handler = acpi_parse_processor_affinity;
> + srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
> + srat_proc[1].handler = acpi_parse_x2apic_affinity;

Once more.

Please add proper explanations why the array parser is required and
why the parse order needs to be reverse.

Thanks,

tglx

2015-07-30 17:43:50

by Lukasz Anaczkowski

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle xapic/x2apic entries in MADT

> Please add proper explanations why the array parser is required and
> why the parse order needs to be reverse.

Thanks for comments.
I'm sending patch with reworked commit message. I hope that at least
some of the doubts are cleaner now.

Cheers,
Lukasz

2015-07-30 17:43:53

by Lukasz Anaczkowski

[permalink] [raw]
Subject: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

>From the ACPI spec:
"Logical processors with APIC ID values less than 0xFF
(whether in XAPIC or X2APIC mode) must use the Processor LAPIC
structure [...]. Logical processors with APIC ID values 0xFF and
greater must use the Processor Local x2APIC structure."

Because of above, BIOS is first enumerating cores with HT with
LAPIC values (<0xFF) and then rest with X2APIC values (>=0xFF).

With current kernel code, where enumeration is in order:
BSP, X2APIC, LAPIC
enumeration on machine with more than 255 CPUs (each core with 4 HT)
first X2APIC IDs get low logical CPU IDs (1..x) and then LAPIC IDs
get higher logical CPU IDs (50..y), as in example below:

Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
0 0 0000 97 0001 145 0002 193 0003
1 50 0004 98 0005 146 0006 194 0007
2 51 0010 99 0011 147 0012 195 0013
3 52 0014 100 0015 148 0016 196 0017
4 53 0018 101 0019 149 001a 197 001b
5 54 001c 102 001d 150 001e 198 001f
...
62 95 00f8 143 00f9 191 00fa 239 00fb
63 37 00ff 96 00fc 144 00fd 192 00fe
64 1 0100 13 0101 25 0102 38 0103
65 2 0104 14 0105 26 0106 39 0107
...

(Core - physical core, LCpu - logical CPU, ApicId - ID assigned
by BIOS).

This is wrong for the following reasons:
() it's hard to predict how cores and threads will be enumerated
() when it's hard to predict, s/w threads cannot be properly affinitized
causing significant performance impact due to e.g. inproper cache
sharing
() enumeration is inconsistent with how threads are enumerated on
other Intel Xeon processors

To fix this, each LAPIC/X2APIC entry from MADT table needs to be
handled at the same time when processing it, thus adding
acpi_subtable_proc structure which stores
() ACPI table id
() handler that processes table
() counter how many items has been processed
and passing it to acpi_table_parse_entries().

Also, order in which MADT LAPIC/X2APIC handlers are passed is
reversed to achieve correct CPU enumeration.

In scenario when someone boots kernel with options 'maxcpus=72 nox2apic',
in result less cores may be booted, since some of the CPUs the kernel
will try to use will have APIC ID >= 0xFF. In such case, one
should not pass 'nox2apic'.

Disclimer: code parsing MADT LAPIC/X2APIC has not been touched since 2009,
when X2APIC support was initially added. I do not know why MADT parsing
code was added in the reversed order in the first place.
I guess it didn't matter at that time since nobody cared about cores
with APIC IDs >= 0xFF, right?

This patch is based on work of "Yinghai Lu <[email protected]>"
previously published at https://lkml.org/lkml/2013/1/21/563,
thus putting Yinghai Lu as 'Signed-off-by', as well.

Signed-off-by: Yinghai Lu <[email protected]>
Signed-off-by: Lukasz Anaczkowski <[email protected]>
---
arch/x86/kernel/acpi/boot.c | 29 +++++++++++++-----
drivers/acpi/numa.c | 28 ++++++++++++-----
drivers/acpi/tables.c | 75 ++++++++++++++++++++++++++++-----------------
drivers/irqchip/irq-gic.c | 15 ++++++---
include/linux/acpi.h | 13 ++++++--
5 files changed, 111 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e49ee24..fb4a9d6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -981,6 +981,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
{
int count;
int x2count = 0;
+ struct acpi_subtable_proc madt_proc[2];

if (!cpu_has_apic)
return -ENODEV;
@@ -1004,10 +1005,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
acpi_parse_sapic, MAX_LOCAL_APIC);

if (!count) {
- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
- acpi_parse_x2apic, MAX_LOCAL_APIC);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
- acpi_parse_lapic, MAX_LOCAL_APIC);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+ madt_proc[0].handler = acpi_parse_lapic;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+ madt_proc[1].handler = acpi_parse_x2apic;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
}
if (!count && !x2count) {
printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -1019,10 +1026,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
return count;
}

- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
- acpi_parse_x2apic_nmi, 0);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
- acpi_parse_lapic_nmi, 0);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_X2APIC_NMI;
+ madt_proc[0].handler = acpi_parse_x2apic_nmi;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_APIC_NMI;
+ madt_proc[1].handler = acpi_parse_lapic_nmi;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), 0);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
/* TBD: Cleanup to allow fallback to MPS */
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index acaa3b4..a000195 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -314,9 +314,15 @@ static int __init
acpi_table_parse_srat(enum acpi_srat_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_SRAT,
- sizeof(struct acpi_table_srat), id,
- handler, max_entries);
+ struct acpi_subtable_proc srat_proc;
+
+ memset(&srat_proc, 0, sizeof(srat_proc));
+ srat_proc.id = id;
+ srat_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ &srat_proc, 1, max_entries);
}

int __init acpi_numa_init(void)
@@ -331,10 +337,18 @@ int __init acpi_numa_init(void)

/* SRAT: Static Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
- acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
- acpi_parse_x2apic_affinity, 0);
- acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
- acpi_parse_processor_affinity, 0);
+ struct acpi_subtable_proc srat_proc[2];
+
+ memset(srat_proc, 0, sizeof(srat_proc));
+ srat_proc[0].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
+ srat_proc[0].handler = acpi_parse_x2apic_affinity;
+ srat_proc[1].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
+ srat_proc[1].handler = acpi_parse_processor_affinity;
+
+ acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ srat_proc, ARRAY_SIZE(srat_proc), 0);
+
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 2e19189..1217e41 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -216,25 +216,27 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)

int __init
acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries)
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries)
{
struct acpi_subtable_header *entry;
int count = 0;
unsigned long table_end;
+ int i;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
-
- if (!table_size)
+ }
+ if (!table_size) {
+ proc[0].count = -EINVAL;
return -EINVAL;
+ }

if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

@@ -247,12 +249,17 @@ acpi_parse_entries(char *id, unsigned long table_size,

while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
table_end) {
- if (entry->type == entry_id
- && (!max_entries || count < max_entries)) {
- if (handler(entry, table_end))
+ for (i = 0; i < proc_num; i++) {
+ if (entry->type != proc[i].id)
+ continue;
+ if (max_entries && count++ >= max_entries)
+ continue;
+ if (proc[i].handler(entry, table_end)) {
+ proc[i].count = -EINVAL;
return -EINVAL;
-
- count++;
+ }
+ proc[i].count++;
+ break;
}

/*
@@ -260,7 +267,11 @@ acpi_parse_entries(char *id, unsigned long table_size,
* infinite loop.
*/
if (entry->length == 0) {
- pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, entry_id);
+ pr_err("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] Invalid zero length\n");
+ proc[0].count = -EINVAL;
return -EINVAL;
}

@@ -269,18 +280,20 @@ acpi_parse_entries(char *id, unsigned long table_size,
}

if (max_entries && count > max_entries) {
- pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
- id, entry_id, count - max_entries, count);
+ pr_warn("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] ignored %i entries of %i found\n",
+ count-max_entries, count);
}

return count;
}

int __init
-acpi_table_parse_entries(char *id,
+acpi_table_parse_entries_array(char *id,
unsigned long table_size,
- int entry_id,
- acpi_tbl_entry_handler handler,
+ struct acpi_subtable_proc *proc, int proc_num,
unsigned int max_entries)
{
struct acpi_table_header *table_header = NULL;
@@ -288,11 +301,10 @@ acpi_table_parse_entries(char *id,
int count;
u32 instance = 0;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
+ }

if (!strncmp(id, ACPI_SIG_MADT, 4))
instance = acpi_apic_instance;
@@ -300,11 +312,12 @@ acpi_table_parse_entries(char *id,
acpi_get_table_with_size(id, instance, &table_header, &tbl_size);
if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

- count = acpi_parse_entries(id, table_size, handler, table_header,
- entry_id, max_entries);
+ count = acpi_parse_entries(id, table_size, table_header,
+ proc, proc_num, max_entries);

early_acpi_os_unmap_memory((char *)table_header, tbl_size);
return count;
@@ -314,9 +327,15 @@ int __init
acpi_table_parse_madt(enum acpi_madt_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_MADT,
- sizeof(struct acpi_table_madt), id,
- handler, max_entries);
+ struct acpi_subtable_proc madt_proc;
+
+ memset(&madt_proc, 0, sizeof(madt_proc));
+ madt_proc.id = id;
+ madt_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ &madt_proc, 1, max_entries);
}

/**
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4dd8826..d004a32 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1091,12 +1091,16 @@ gic_v2_acpi_init(struct acpi_table_header *table)
{
void __iomem *cpu_base, *dist_base;
int count;
+ struct acpi_subtable_proc gic_proc;
+
+ memset(gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
+ gic_proc.handler = gic_acpi_parse_madt_cpu;

/* Collect CPU base addresses */
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_cpu, table,
- ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
+ table, gic_proc, 0);
if (count <= 0) {
pr_err("No valid GICC entries exist\n");
return -EINVAL;
@@ -1106,10 +1110,13 @@ gic_v2_acpi_init(struct acpi_table_header *table)
* Find distributor base address. We expect one distributor entry since
* ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
*/
+ memset(gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR;
+ gic_proc.handler = gic_acpi_parse_madt_distributor;
+
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_distributor, table,
- ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
+ table, gic_proc, 0);
if (count <= 0) {
pr_err("No valid GICD entries exist\n");
return -EINVAL;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d2445fa..59b17e8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -135,6 +135,12 @@ static inline void acpi_initrd_override(void *data, size_t size)
(!entry) || (unsigned long)entry + sizeof(*entry) > end || \
((struct acpi_subtable_header *)entry)->length < sizeof(*entry))

+struct acpi_subtable_proc {
+ int id;
+ acpi_tbl_entry_handler handler;
+ int count;
+};
+
char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
void __acpi_unmap_table(char *map, unsigned long size);
int early_acpi_boot_init(void);
@@ -145,10 +151,13 @@ int acpi_numa_init (void);

int acpi_table_init (void);
int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
+int acpi_table_parse_entries_array(char *id, unsigned long table_size,
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries);
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_table_parse_entries(char *id, unsigned long table_size,
int entry_id,
acpi_tbl_entry_handler handler,
--
1.8.3.1

2015-08-02 09:57:37

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

On Thu, 30 Jul 2015, Lukasz Anaczkowski wrote:
> Also, order in which MADT LAPIC/X2APIC handlers are passed is
> reversed to achieve correct CPU enumeration.

Ok. That is an understandable changelog and the patch itself looks
about correct, but I leave that judgement to the ACPI experts.

This also wants an ack/review from Marc for the GIC part.

Acked-by: Thomas Gleixner <[email protected]>

2015-08-02 12:40:55

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

On Thu, 30 Jul 2015 19:43:39 +0200
Lukasz Anaczkowski <[email protected]> wrote:

> From the ACPI spec:
> "Logical processors with APIC ID values less than 0xFF
> (whether in XAPIC or X2APIC mode) must use the Processor LAPIC
> structure [...]. Logical processors with APIC ID values 0xFF and
> greater must use the Processor Local x2APIC structure."
>
> Because of above, BIOS is first enumerating cores with HT with
> LAPIC values (<0xFF) and then rest with X2APIC values (>=0xFF).
>
> With current kernel code, where enumeration is in order:
> BSP, X2APIC, LAPIC
> enumeration on machine with more than 255 CPUs (each core with 4 HT)
> first X2APIC IDs get low logical CPU IDs (1..x) and then LAPIC IDs
> get higher logical CPU IDs (50..y), as in example below:
>
> Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
> 0 0 0000 97 0001 145 0002 193 0003
> 1 50 0004 98 0005 146 0006 194 0007
> 2 51 0010 99 0011 147 0012 195 0013
> 3 52 0014 100 0015 148 0016 196 0017
> 4 53 0018 101 0019 149 001a 197 001b
> 5 54 001c 102 001d 150 001e 198 001f
> ...
> 62 95 00f8 143 00f9 191 00fa 239 00fb
> 63 37 00ff 96 00fc 144 00fd 192 00fe
> 64 1 0100 13 0101 25 0102 38 0103
> 65 2 0104 14 0105 26 0106 39 0107
> ...
>
> (Core - physical core, LCpu - logical CPU, ApicId - ID assigned
> by BIOS).
>
> This is wrong for the following reasons:
> () it's hard to predict how cores and threads will be enumerated
> () when it's hard to predict, s/w threads cannot be properly affinitized
> causing significant performance impact due to e.g. inproper cache
> sharing
> () enumeration is inconsistent with how threads are enumerated on
> other Intel Xeon processors
>
> To fix this, each LAPIC/X2APIC entry from MADT table needs to be
> handled at the same time when processing it, thus adding
> acpi_subtable_proc structure which stores
> () ACPI table id
> () handler that processes table
> () counter how many items has been processed
> and passing it to acpi_table_parse_entries().
>
> Also, order in which MADT LAPIC/X2APIC handlers are passed is
> reversed to achieve correct CPU enumeration.
>
> In scenario when someone boots kernel with options 'maxcpus=72 nox2apic',
> in result less cores may be booted, since some of the CPUs the kernel
> will try to use will have APIC ID >= 0xFF. In such case, one
> should not pass 'nox2apic'.
>
> Disclimer: code parsing MADT LAPIC/X2APIC has not been touched since 2009,
> when X2APIC support was initially added. I do not know why MADT parsing
> code was added in the reversed order in the first place.
> I guess it didn't matter at that time since nobody cared about cores
> with APIC IDs >= 0xFF, right?
>
> This patch is based on work of "Yinghai Lu <[email protected]>"
> previously published at https://lkml.org/lkml/2013/1/21/563,
> thus putting Yinghai Lu as 'Signed-off-by', as well.
>
> Signed-off-by: Yinghai Lu <[email protected]>
> Signed-off-by: Lukasz Anaczkowski <[email protected]>
> ---
> arch/x86/kernel/acpi/boot.c | 29 +++++++++++++-----
> drivers/acpi/numa.c | 28 ++++++++++++-----
> drivers/acpi/tables.c | 75 ++++++++++++++++++++++++++++-----------------
> drivers/irqchip/irq-gic.c | 15 ++++++---
> include/linux/acpi.h | 13 ++++++--
> 5 files changed, 111 insertions(+), 49 deletions(-)
>

Hi Lukasz,

[...]

> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> index 4dd8826..d004a32 100644
> --- a/drivers/irqchip/irq-gic.c
> +++ b/drivers/irqchip/irq-gic.c
> @@ -1091,12 +1091,16 @@ gic_v2_acpi_init(struct acpi_table_header *table)
> {
> void __iomem *cpu_base, *dist_base;
> int count;
> + struct acpi_subtable_proc gic_proc;
> +
> + memset(gic_proc, 0, sizeof(gic_proc));

You haven't ever tried compiling this, have you?

> + gic_proc.id = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
> + gic_proc.handler = gic_acpi_parse_madt_cpu;
>
> /* Collect CPU base addresses */
> count = acpi_parse_entries(ACPI_SIG_MADT,
> sizeof(struct acpi_table_madt),
> - gic_acpi_parse_madt_cpu, table,
> - ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
> + table, gic_proc, 0);

This doesn't match the prototype below.

> if (count <= 0) {
> pr_err("No valid GICC entries exist\n");
> return -EINVAL;
> @@ -1106,10 +1110,13 @@ gic_v2_acpi_init(struct acpi_table_header *table)
> * Find distributor base address. We expect one distributor entry since
> * ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
> */
> + memset(gic_proc, 0, sizeof(gic_proc));
> + gic_proc.id = ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR;
> + gic_proc.handler = gic_acpi_parse_madt_distributor;
> +
> count = acpi_parse_entries(ACPI_SIG_MADT,
> sizeof(struct acpi_table_madt),
> - gic_acpi_parse_madt_distributor, table,
> - ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
> + table, gic_proc, 0);
> if (count <= 0) {
> pr_err("No valid GICD entries exist\n");
> return -EINVAL;
> diff --git a/include/linux/acpi.h b/include/linux/acpi.h
> index d2445fa..59b17e8 100644
> --- a/include/linux/acpi.h
> +++ b/include/linux/acpi.h
> @@ -135,6 +135,12 @@ static inline void acpi_initrd_override(void *data, size_t size)
> (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
> ((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
>
> +struct acpi_subtable_proc {
> + int id;
> + acpi_tbl_entry_handler handler;
> + int count;
> +};
> +
> char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
> void __acpi_unmap_table(char *map, unsigned long size);
> int early_acpi_boot_init(void);
> @@ -145,10 +151,13 @@ int acpi_numa_init (void);
>
> int acpi_table_init (void);
> int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
> +int acpi_table_parse_entries_array(char *id, unsigned long table_size,
> + struct acpi_subtable_proc *proc, int proc_num,
> + unsigned int max_entries);
> int __init acpi_parse_entries(char *id, unsigned long table_size,
> - acpi_tbl_entry_handler handler,
> struct acpi_table_header *table_header,
> - int entry_id, unsigned int max_entries);
> + struct acpi_subtable_proc *proc, int proc_num,

Could you please check that it actually compiles when you enable ACPI
on arm64?

Thanks,

M.
--
Jazz is not dead. It just smells funny.

2015-08-03 18:26:29

by Lukasz Anaczkowski

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

Hi Marc,

> You haven't ever tried compiling this, have you?

I *thought* I tried, but obviously I did it wrong.
This time I made sure it compiles. Thanks for poiting
this out.

Sending v2 of the patch with fixed arm64 compilation.

Cheers,
Lukasz

2015-08-03 18:26:43

by Lukasz Anaczkowski

[permalink] [raw]
Subject: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

v2: Fixed ARM64 syntax error

>From the ACPI spec:
"Logical processors with APIC ID values less than 0xFF
(whether in XAPIC or X2APIC mode) must use the Processor LAPIC
structure [...]. Logical processors with APIC ID values 0xFF and
greater must use the Processor Local x2APIC structure."

Because of above, BIOS is first enumerating cores with HT with
LAPIC values (<0xFF) and then rest with X2APIC values (>=0xFF).

With current kernel code, where enumeration is in order:
BSP, X2APIC, LAPIC
enumeration on machine with more than 255 CPUs (each core with 4 HT)
first X2APIC IDs get low logical CPU IDs (1..x) and then LAPIC IDs
get higher logical CPU IDs (50..y), as in example below:

Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
0 0 0000 97 0001 145 0002 193 0003
1 50 0004 98 0005 146 0006 194 0007
2 51 0010 99 0011 147 0012 195 0013
3 52 0014 100 0015 148 0016 196 0017
4 53 0018 101 0019 149 001a 197 001b
5 54 001c 102 001d 150 001e 198 001f
...
62 95 00f8 143 00f9 191 00fa 239 00fb
63 37 00ff 96 00fc 144 00fd 192 00fe
64 1 0100 13 0101 25 0102 38 0103
65 2 0104 14 0105 26 0106 39 0107
...

(Core - physical core, LCpu - logical CPU, ApicId - ID assigned
by BIOS).

This is wrong for the following reasons:
() it's hard to predict how cores and threads will be enumerated
() when it's hard to predict, s/w threads cannot be properly affinitized
causing significant performance impact due to e.g. inproper cache
sharing
() enumeration is inconsistent with how threads are enumerated on
other Intel Xeon processors

To fix this, each LAPIC/X2APIC entry from MADT table needs to be
handled at the same time when processing it, thus adding
acpi_subtable_proc structure which stores
() ACPI table id
() handler that processes table
() counter how many items has been processed
and passing it to acpi_table_parse_entries().

Also, order in which MADT LAPIC/X2APIC handlers are passed is
reversed to achieve correct CPU enumeration.

In scenario when someone boots kernel with options 'maxcpus=72 nox2apic',
in result less cores may be booted, since some of the CPUs the kernel
will try to use will have APIC ID >= 0xFF. In such case, one
should not pass 'nox2apic'.

Disclimer: code parsing MADT LAPIC/X2APIC has not been touched since 2009,
when X2APIC support was initially added. I do not know why MADT parsing
code was added in the reversed order in the first place.
I guess it didn't matter at that time since nobody cared about cores
with APIC IDs >= 0xFF, right?

This patch is based on work of "Yinghai Lu <[email protected]>"
previously published at https://lkml.org/lkml/2013/1/21/563,
thus putting Yinghai Lu as 'Signed-off-by', as well.

Signed-off-by: Yinghai Lu <[email protected]>
Signed-off-by: Lukasz Anaczkowski <[email protected]>
---
arch/x86/kernel/acpi/boot.c | 29 +++++++++++++-----
drivers/acpi/numa.c | 28 ++++++++++++-----
drivers/acpi/tables.c | 75 ++++++++++++++++++++++++++++-----------------
drivers/irqchip/irq-gic.c | 15 ++++++---
include/linux/acpi.h | 13 ++++++--
5 files changed, 111 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e49ee24..fb4a9d6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -981,6 +981,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
{
int count;
int x2count = 0;
+ struct acpi_subtable_proc madt_proc[2];

if (!cpu_has_apic)
return -ENODEV;
@@ -1004,10 +1005,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
acpi_parse_sapic, MAX_LOCAL_APIC);

if (!count) {
- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
- acpi_parse_x2apic, MAX_LOCAL_APIC);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
- acpi_parse_lapic, MAX_LOCAL_APIC);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+ madt_proc[0].handler = acpi_parse_lapic;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+ madt_proc[1].handler = acpi_parse_x2apic;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
}
if (!count && !x2count) {
printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -1019,10 +1026,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
return count;
}

- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
- acpi_parse_x2apic_nmi, 0);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
- acpi_parse_lapic_nmi, 0);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_X2APIC_NMI;
+ madt_proc[0].handler = acpi_parse_x2apic_nmi;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_APIC_NMI;
+ madt_proc[1].handler = acpi_parse_lapic_nmi;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), 0);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
/* TBD: Cleanup to allow fallback to MPS */
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index acaa3b4..a000195 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -314,9 +314,15 @@ static int __init
acpi_table_parse_srat(enum acpi_srat_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_SRAT,
- sizeof(struct acpi_table_srat), id,
- handler, max_entries);
+ struct acpi_subtable_proc srat_proc;
+
+ memset(&srat_proc, 0, sizeof(srat_proc));
+ srat_proc.id = id;
+ srat_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ &srat_proc, 1, max_entries);
}

int __init acpi_numa_init(void)
@@ -331,10 +337,18 @@ int __init acpi_numa_init(void)

/* SRAT: Static Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
- acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
- acpi_parse_x2apic_affinity, 0);
- acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
- acpi_parse_processor_affinity, 0);
+ struct acpi_subtable_proc srat_proc[2];
+
+ memset(srat_proc, 0, sizeof(srat_proc));
+ srat_proc[0].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
+ srat_proc[0].handler = acpi_parse_x2apic_affinity;
+ srat_proc[1].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
+ srat_proc[1].handler = acpi_parse_processor_affinity;
+
+ acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ srat_proc, ARRAY_SIZE(srat_proc), 0);
+
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 2e19189..1217e41 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -216,25 +216,27 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)

int __init
acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries)
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries)
{
struct acpi_subtable_header *entry;
int count = 0;
unsigned long table_end;
+ int i;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
-
- if (!table_size)
+ }
+ if (!table_size) {
+ proc[0].count = -EINVAL;
return -EINVAL;
+ }

if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

@@ -247,12 +249,17 @@ acpi_parse_entries(char *id, unsigned long table_size,

while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
table_end) {
- if (entry->type == entry_id
- && (!max_entries || count < max_entries)) {
- if (handler(entry, table_end))
+ for (i = 0; i < proc_num; i++) {
+ if (entry->type != proc[i].id)
+ continue;
+ if (max_entries && count++ >= max_entries)
+ continue;
+ if (proc[i].handler(entry, table_end)) {
+ proc[i].count = -EINVAL;
return -EINVAL;
-
- count++;
+ }
+ proc[i].count++;
+ break;
}

/*
@@ -260,7 +267,11 @@ acpi_parse_entries(char *id, unsigned long table_size,
* infinite loop.
*/
if (entry->length == 0) {
- pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, entry_id);
+ pr_err("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] Invalid zero length\n");
+ proc[0].count = -EINVAL;
return -EINVAL;
}

@@ -269,18 +280,20 @@ acpi_parse_entries(char *id, unsigned long table_size,
}

if (max_entries && count > max_entries) {
- pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
- id, entry_id, count - max_entries, count);
+ pr_warn("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] ignored %i entries of %i found\n",
+ count-max_entries, count);
}

return count;
}

int __init
-acpi_table_parse_entries(char *id,
+acpi_table_parse_entries_array(char *id,
unsigned long table_size,
- int entry_id,
- acpi_tbl_entry_handler handler,
+ struct acpi_subtable_proc *proc, int proc_num,
unsigned int max_entries)
{
struct acpi_table_header *table_header = NULL;
@@ -288,11 +301,10 @@ acpi_table_parse_entries(char *id,
int count;
u32 instance = 0;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
+ }

if (!strncmp(id, ACPI_SIG_MADT, 4))
instance = acpi_apic_instance;
@@ -300,11 +312,12 @@ acpi_table_parse_entries(char *id,
acpi_get_table_with_size(id, instance, &table_header, &tbl_size);
if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

- count = acpi_parse_entries(id, table_size, handler, table_header,
- entry_id, max_entries);
+ count = acpi_parse_entries(id, table_size, table_header,
+ proc, proc_num, max_entries);

early_acpi_os_unmap_memory((char *)table_header, tbl_size);
return count;
@@ -314,9 +327,15 @@ int __init
acpi_table_parse_madt(enum acpi_madt_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_MADT,
- sizeof(struct acpi_table_madt), id,
- handler, max_entries);
+ struct acpi_subtable_proc madt_proc;
+
+ memset(&madt_proc, 0, sizeof(madt_proc));
+ madt_proc.id = id;
+ madt_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ &madt_proc, 1, max_entries);
}

/**
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4dd8826..d98b866 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1091,12 +1091,16 @@ gic_v2_acpi_init(struct acpi_table_header *table)
{
void __iomem *cpu_base, *dist_base;
int count;
+ struct acpi_subtable_proc gic_proc;
+
+ memset(&gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
+ gic_proc.handler = gic_acpi_parse_madt_cpu;

/* Collect CPU base addresses */
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_cpu, table,
- ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
+ table, &gic_proc, 1, 0);
if (count <= 0) {
pr_err("No valid GICC entries exist\n");
return -EINVAL;
@@ -1106,10 +1110,13 @@ gic_v2_acpi_init(struct acpi_table_header *table)
* Find distributor base address. We expect one distributor entry since
* ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
*/
+ memset(&gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR;
+ gic_proc.handler = gic_acpi_parse_madt_distributor;
+
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_distributor, table,
- ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
+ table, &gic_proc, 1, 0);
if (count <= 0) {
pr_err("No valid GICD entries exist\n");
return -EINVAL;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d2445fa..59b17e8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -135,6 +135,12 @@ static inline void acpi_initrd_override(void *data, size_t size)
(!entry) || (unsigned long)entry + sizeof(*entry) > end || \
((struct acpi_subtable_header *)entry)->length < sizeof(*entry))

+struct acpi_subtable_proc {
+ int id;
+ acpi_tbl_entry_handler handler;
+ int count;
+};
+
char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
void __acpi_unmap_table(char *map, unsigned long size);
int early_acpi_boot_init(void);
@@ -145,10 +151,13 @@ int acpi_numa_init (void);

int acpi_table_init (void);
int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
+int acpi_table_parse_entries_array(char *id, unsigned long table_size,
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries);
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_table_parse_entries(char *id, unsigned long table_size,
int entry_id,
acpi_tbl_entry_handler handler,
--
1.8.3.1

2015-08-26 07:05:28

by Lukasz Anaczkowski

[permalink] [raw]
Subject: RE: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

On Monday, August 3, 2015 8:26 PM
Lukasz Anaczkowski <[email protected]> wrote:

> v2: Fixed ARM64 syntax error

Hi Marc,

Does this patch look ok now?

Thanks,
Lukasz

2015-08-26 10:43:10

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

Hi Lukasz,

On 26/08/15 08:04, Anaczkowski, Lukasz wrote:
> On Monday, August 3, 2015 8:26 PM
> Lukasz Anaczkowski <[email protected]> wrote:
>
>> v2: Fixed ARM64 syntax error
>
> Hi Marc,
>
> Does this patch look ok now?

Contrarily to popular belief, I do not read every email on LKML. Crazy,
I know. How about cc-ing me next time? Even your reply to my original
rant wasn't addressed to me (how did you manage that?)...

I'll have a look now.

Thanks,

M.
--
Jazz is not dead. It just smells funny...

2015-08-26 11:34:10

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

On 2015-08-03 19:26, Lukasz Anaczkowski wrote:
> v2: Fixed ARM64 syntax error
>
> From the ACPI spec:
> "Logical processors with APIC ID values less than 0xFF
> (whether in XAPIC or X2APIC mode) must use the Processor LAPIC
> structure [...]. Logical processors with APIC ID values 0xFF and
> greater must use the Processor Local x2APIC structure."
>
> Because of above, BIOS is first enumerating cores with HT with
> LAPIC values (<0xFF) and then rest with X2APIC values (>=0xFF).
>
> With current kernel code, where enumeration is in order:
> BSP, X2APIC, LAPIC
> enumeration on machine with more than 255 CPUs (each core with 4 HT)
> first X2APIC IDs get low logical CPU IDs (1..x) and then LAPIC IDs
> get higher logical CPU IDs (50..y), as in example below:
>
> Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
> 0 0 0000 97 0001 145 0002 193 0003
> 1 50 0004 98 0005 146 0006 194 0007
> 2 51 0010 99 0011 147 0012 195 0013
> 3 52 0014 100 0015 148 0016 196 0017
> 4 53 0018 101 0019 149 001a 197 001b
> 5 54 001c 102 001d 150 001e 198 001f
> ...
> 62 95 00f8 143 00f9 191 00fa 239 00fb
> 63 37 00ff 96 00fc 144 00fd 192 00fe
> 64 1 0100 13 0101 25 0102 38 0103
> 65 2 0104 14 0105 26 0106 39 0107
> ...
>
> (Core - physical core, LCpu - logical CPU, ApicId - ID assigned
> by BIOS).
>
> This is wrong for the following reasons:
> () it's hard to predict how cores and threads will be enumerated
> () when it's hard to predict, s/w threads cannot be properly
> affinitized
> causing significant performance impact due to e.g. inproper cache
> sharing
> () enumeration is inconsistent with how threads are enumerated on
> other Intel Xeon processors
>
> To fix this, each LAPIC/X2APIC entry from MADT table needs to be
> handled at the same time when processing it, thus adding
> acpi_subtable_proc structure which stores
> () ACPI table id
> () handler that processes table
> () counter how many items has been processed
> and passing it to acpi_table_parse_entries().
>
> Also, order in which MADT LAPIC/X2APIC handlers are passed is
> reversed to achieve correct CPU enumeration.
>
> In scenario when someone boots kernel with options 'maxcpus=72
> nox2apic',
> in result less cores may be booted, since some of the CPUs the kernel
> will try to use will have APIC ID >= 0xFF. In such case, one
> should not pass 'nox2apic'.
>
> Disclimer: code parsing MADT LAPIC/X2APIC has not been touched since
> 2009,
> when X2APIC support was initially added. I do not know why MADT
> parsing
> code was added in the reversed order in the first place.
> I guess it didn't matter at that time since nobody cared about cores
> with APIC IDs >= 0xFF, right?
>
> This patch is based on work of "Yinghai Lu <[email protected]>"
> previously published at https://lkml.org/lkml/2013/1/21/563,
> thus putting Yinghai Lu as 'Signed-off-by', as well.
>
> Signed-off-by: Yinghai Lu <[email protected]>
> Signed-off-by: Lukasz Anaczkowski <[email protected]>

It looked good, but I then made the mistake to actually try it. Bad
idea:

[...]
RCU: Adjusting geometry for rcu_fanout_leaf=64, nr_cpu_ids=6
NR_IRQS:64 nr_irqs:64 0
No valid GICC entries exist
ACPI: Failed to initialize GIC IRQ controller
Kernel panic - not syncing: No interrupt controller found.
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.2.0-rc8+ #4623
Hardware name: ARM Juno development board (r1) (DT)
Call trace:
[<ffffffc000089960>] dump_backtrace+0x0/0x124
[<ffffffc000089a94>] show_stack+0x10/0x1c
[<ffffffc000658968>] dump_stack+0x84/0xc8
[<ffffffc0006577d8>] panic+0xe0/0x220
[<ffffffc0008cd2f4>] init_IRQ+0x20/0x2c
[<ffffffc0008cb850>] start_kernel+0x260/0x3b8
---[ end Kernel panic - not syncing: No interrupt controller found.

Reverting the patch makes the machine boot again. As I presume you
don't have access to such a box, I'll try to investigate it shortly.

Thanks,

M.
--
Fast, cheap, reliable. Pick two.

2015-08-26 11:42:55

by Lorenzo Pieralisi

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

Hi Lukasz,

On Wed, Aug 26, 2015 at 11:43:04AM +0100, Marc Zyngier wrote:
> Hi Lukasz,
>
> On 26/08/15 08:04, Anaczkowski, Lukasz wrote:
> > On Monday, August 3, 2015 8:26 PM
> > Lukasz Anaczkowski <[email protected]> wrote:
> >
> >> v2: Fixed ARM64 syntax error
> >
> > Hi Marc,
> >
> > Does this patch look ok now?

No it does not, it seems to break arm64, I put together a fix
below. I do not think the way you handle the count increment
in acpi_parse_entries() is correct anyway, since you increment
it only if max_entries != 0, which changes mainline behaviour.

Thanks,
Lorenzo

-- >8 --

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index d98b866..ca5591d 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1101,7 +1101,7 @@ gic_v2_acpi_init(struct acpi_table_header *table)
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
table, &gic_proc, 1, 0);
- if (count <= 0) {
+ if (gic_proc.count <= 0) {
pr_err("No valid GICC entries exist\n");
return -EINVAL;
}
@@ -1117,7 +1117,7 @@ gic_v2_acpi_init(struct acpi_table_header *table)
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
table, &gic_proc, 1, 0);
- if (count <= 0) {
+ if (gic_proc.count <= 0) {
pr_err("No valid GICD entries exist\n");
return -EINVAL;
} else if (count > 1) {
--
2.2.1

2015-08-26 12:43:16

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

On 26/08/15 12:42, Lorenzo Pieralisi wrote:
> Hi Lukasz,
>
> On Wed, Aug 26, 2015 at 11:43:04AM +0100, Marc Zyngier wrote:
>> Hi Lukasz,
>>
>> On 26/08/15 08:04, Anaczkowski, Lukasz wrote:
>>> On Monday, August 3, 2015 8:26 PM
>>> Lukasz Anaczkowski <[email protected]> wrote:
>>>
>>>> v2: Fixed ARM64 syntax error
>>>
>>> Hi Marc,
>>>
>>> Does this patch look ok now?
>
> No it does not, it seems to break arm64, I put together a fix
> below. I do not think the way you handle the count increment
> in acpi_parse_entries() is correct anyway, since you increment
> it only if max_entries != 0, which changes mainline behaviour.

Yeah, this is fundamentally flawed:

- count is only incremented when max_entries != 0, as you noticed
- With max_entries != 0, count now represent the sum of all matches
Is that expected?
- The proc iteration stops after the first match. Why?
- The test for max_entries is done inside the proc loop. Why?

I came up with the following patch that restores arm64 to a booting state.

If the intention was to change the meaning of the acpi_parse_entries
return value, then this should be documented and agreed upon.

diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 1217e41..f06327f 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -249,19 +249,24 @@ acpi_parse_entries(char *id, unsigned long table_size,

while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
table_end) {
+ bool match = false;
+
+ if (max_entries && count >= max_entries)
+ break;
for (i = 0; i < proc_num; i++) {
if (entry->type != proc[i].id)
continue;
- if (max_entries && count++ >= max_entries)
- continue;
if (proc[i].handler(entry, table_end)) {
proc[i].count = -EINVAL;
return -EINVAL;
}
proc[i].count++;
- break;
+ match = true;
}

+ if (match)
+ count++;
+
/*
* If entry->length is 0, break from this loop to avoid
* infinite loop.

Thanks,

M.
--
Jazz is not dead. It just smells funny...

2015-08-26 12:56:57

by Tomasz Nowicki

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT



On 03.08.2015 20:26, Lukasz Anaczkowski wrote:
> v2: Fixed ARM64 syntax error
>
> From the ACPI spec:
> "Logical processors with APIC ID values less than 0xFF
> (whether in XAPIC or X2APIC mode) must use the Processor LAPIC
> structure [...]. Logical processors with APIC ID values 0xFF and
> greater must use the Processor Local x2APIC structure."
>
> Because of above, BIOS is first enumerating cores with HT with
> LAPIC values (<0xFF) and then rest with X2APIC values (>=0xFF).
>
> With current kernel code, where enumeration is in order:
> BSP, X2APIC, LAPIC
> enumeration on machine with more than 255 CPUs (each core with 4 HT)
> first X2APIC IDs get low logical CPU IDs (1..x) and then LAPIC IDs
> get higher logical CPU IDs (50..y), as in example below:
>
> Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
> 0 0 0000 97 0001 145 0002 193 0003
> 1 50 0004 98 0005 146 0006 194 0007
> 2 51 0010 99 0011 147 0012 195 0013
> 3 52 0014 100 0015 148 0016 196 0017
> 4 53 0018 101 0019 149 001a 197 001b
> 5 54 001c 102 001d 150 001e 198 001f
> ...
> 62 95 00f8 143 00f9 191 00fa 239 00fb
> 63 37 00ff 96 00fc 144 00fd 192 00fe
> 64 1 0100 13 0101 25 0102 38 0103
> 65 2 0104 14 0105 26 0106 39 0107
> ...
>
> (Core - physical core, LCpu - logical CPU, ApicId - ID assigned
> by BIOS).
>
> This is wrong for the following reasons:
> () it's hard to predict how cores and threads will be enumerated
> () when it's hard to predict, s/w threads cannot be properly affinitized
> causing significant performance impact due to e.g. inproper cache
> sharing
> () enumeration is inconsistent with how threads are enumerated on
> other Intel Xeon processors
>
> To fix this, each LAPIC/X2APIC entry from MADT table needs to be
> handled at the same time when processing it, thus adding
> acpi_subtable_proc structure which stores
> () ACPI table id
> () handler that processes table
> () counter how many items has been processed
> and passing it to acpi_table_parse_entries().
>
> Also, order in which MADT LAPIC/X2APIC handlers are passed is
> reversed to achieve correct CPU enumeration.
>
> In scenario when someone boots kernel with options 'maxcpus=72 nox2apic',
> in result less cores may be booted, since some of the CPUs the kernel
> will try to use will have APIC ID >= 0xFF. In such case, one
> should not pass 'nox2apic'.
>
> Disclimer: code parsing MADT LAPIC/X2APIC has not been touched since 2009,
> when X2APIC support was initially added. I do not know why MADT parsing
> code was added in the reversed order in the first place.
> I guess it didn't matter at that time since nobody cared about cores
> with APIC IDs >= 0xFF, right?
>
> This patch is based on work of "Yinghai Lu <[email protected]>"
> previously published at https://lkml.org/lkml/2013/1/21/563,
> thus putting Yinghai Lu as 'Signed-off-by', as well.
>
> Signed-off-by: Yinghai Lu <[email protected]>
> Signed-off-by: Lukasz Anaczkowski <[email protected]>
> ---
> arch/x86/kernel/acpi/boot.c | 29 +++++++++++++-----
> drivers/acpi/numa.c | 28 ++++++++++++-----
> drivers/acpi/tables.c | 75 ++++++++++++++++++++++++++++-----------------
> drivers/irqchip/irq-gic.c | 15 ++++++---
> include/linux/acpi.h | 13 ++++++--
> 5 files changed, 111 insertions(+), 49 deletions(-)
>
> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
> index e49ee24..fb4a9d6 100644
> --- a/arch/x86/kernel/acpi/boot.c
> +++ b/arch/x86/kernel/acpi/boot.c
> @@ -981,6 +981,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
> {
> int count;
> int x2count = 0;
> + struct acpi_subtable_proc madt_proc[2];
>
> if (!cpu_has_apic)
> return -ENODEV;
> @@ -1004,10 +1005,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
> acpi_parse_sapic, MAX_LOCAL_APIC);
>
> if (!count) {
> - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
> - acpi_parse_x2apic, MAX_LOCAL_APIC);
> - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
> - acpi_parse_lapic, MAX_LOCAL_APIC);
> + memset(madt_proc, 0, sizeof(madt_proc));
> + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
> + madt_proc[0].handler = acpi_parse_lapic;
> + madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
> + madt_proc[1].handler = acpi_parse_x2apic;
> + acpi_table_parse_entries_array(ACPI_SIG_MADT,
> + sizeof(struct acpi_table_madt),
> + madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
> + count = madt_proc[0].count;
> + x2count = madt_proc[1].count;
> }
> if (!count && !x2count) {
> printk(KERN_ERR PREFIX "No LAPIC entries present\n");
> @@ -1019,10 +1026,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
> return count;
> }
>
> - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
> - acpi_parse_x2apic_nmi, 0);
> - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
> - acpi_parse_lapic_nmi, 0);
> + memset(madt_proc, 0, sizeof(madt_proc));
> + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_X2APIC_NMI;
> + madt_proc[0].handler = acpi_parse_x2apic_nmi;
> + madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_APIC_NMI;
> + madt_proc[1].handler = acpi_parse_lapic_nmi;
> + acpi_table_parse_entries_array(ACPI_SIG_MADT,
> + sizeof(struct acpi_table_madt),
> + madt_proc, ARRAY_SIZE(madt_proc), 0);
> + count = madt_proc[0].count;
> + x2count = madt_proc[1].count;
> if (count < 0 || x2count < 0) {
> printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
> /* TBD: Cleanup to allow fallback to MPS */
> diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
> index acaa3b4..a000195 100644
> --- a/drivers/acpi/numa.c
> +++ b/drivers/acpi/numa.c
> @@ -314,9 +314,15 @@ static int __init
> acpi_table_parse_srat(enum acpi_srat_type id,
> acpi_tbl_entry_handler handler, unsigned int max_entries)
> {
> - return acpi_table_parse_entries(ACPI_SIG_SRAT,
> - sizeof(struct acpi_table_srat), id,
> - handler, max_entries);
> + struct acpi_subtable_proc srat_proc;
> +
> + memset(&srat_proc, 0, sizeof(srat_proc));
> + srat_proc.id = id;
> + srat_proc.handler = handler;
> +
> + return acpi_table_parse_entries_array(ACPI_SIG_SRAT,
> + sizeof(struct acpi_table_srat),
> + &srat_proc, 1, max_entries);
> }
>
> int __init acpi_numa_init(void)
> @@ -331,10 +337,18 @@ int __init acpi_numa_init(void)
>
> /* SRAT: Static Resource Affinity Table */
> if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
> - acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
> - acpi_parse_x2apic_affinity, 0);
> - acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
> - acpi_parse_processor_affinity, 0);
> + struct acpi_subtable_proc srat_proc[2];
> +
> + memset(srat_proc, 0, sizeof(srat_proc));
> + srat_proc[0].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
> + srat_proc[0].handler = acpi_parse_x2apic_affinity;
> + srat_proc[1].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
> + srat_proc[1].handler = acpi_parse_processor_affinity;
> +
> + acpi_table_parse_entries_array(ACPI_SIG_SRAT,
> + sizeof(struct acpi_table_srat),
> + srat_proc, ARRAY_SIZE(srat_proc), 0);
> +
> cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
> acpi_parse_memory_affinity,
> NR_NODE_MEMBLKS);
> diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
> index 2e19189..1217e41 100644
> --- a/drivers/acpi/tables.c
> +++ b/drivers/acpi/tables.c
> @@ -216,25 +216,27 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
>
> int __init
> acpi_parse_entries(char *id, unsigned long table_size,
> - acpi_tbl_entry_handler handler,
> struct acpi_table_header *table_header,
> - int entry_id, unsigned int max_entries)
> + struct acpi_subtable_proc *proc, int proc_num,
> + unsigned int max_entries)
> {
> struct acpi_subtable_header *entry;
> int count = 0;
> unsigned long table_end;
> + int i;
>
> - if (acpi_disabled)
> + if (acpi_disabled) {
> + proc[0].count = -ENODEV;
> return -ENODEV;
> -
> - if (!id || !handler)
> - return -EINVAL;
> -
> - if (!table_size)
> + }
> + if (!table_size) {
> + proc[0].count = -EINVAL;
> return -EINVAL;
> + }
>
> if (!table_header) {
> pr_warn("%4.4s not present\n", id);
> + proc[0].count = -ENODEV;
> return -ENODEV;
> }
>
> @@ -247,12 +249,17 @@ acpi_parse_entries(char *id, unsigned long table_size,
>
> while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
> table_end) {
> - if (entry->type == entry_id
> - && (!max_entries || count < max_entries)) {
> - if (handler(entry, table_end))
> + for (i = 0; i < proc_num; i++) {
> + if (entry->type != proc[i].id)
> + continue;
> + if (max_entries && count++ >= max_entries)
> + continue;
> + if (proc[i].handler(entry, table_end)) {
> + proc[i].count = -EINVAL;
> return -EINVAL;
> -
> - count++;
> + }
> + proc[i].count++;
> + break;
> }
>
> /*
> @@ -260,7 +267,11 @@ acpi_parse_entries(char *id, unsigned long table_size,
> * infinite loop.
> */
> if (entry->length == 0) {
> - pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, entry_id);
> + pr_err("[%4.4s:0x%02x ", id, proc[0].id);
> + for (i = 1; i < proc_num; i++)
> + pr_cont(" 0x%02x", proc[i].id);
> + pr_cont("] Invalid zero length\n");
> + proc[0].count = -EINVAL;
> return -EINVAL;
> }
>
> @@ -269,18 +280,20 @@ acpi_parse_entries(char *id, unsigned long table_size,
> }
>
> if (max_entries && count > max_entries) {
> - pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
> - id, entry_id, count - max_entries, count);
> + pr_warn("[%4.4s:0x%02x ", id, proc[0].id);
> + for (i = 1; i < proc_num; i++)
> + pr_cont(" 0x%02x", proc[i].id);
> + pr_cont("] ignored %i entries of %i found\n",
> + count-max_entries, count);
> }
>
> return count;
> }
>
> int __init
> -acpi_table_parse_entries(char *id,
> +acpi_table_parse_entries_array(char *id,
> unsigned long table_size,
> - int entry_id,
> - acpi_tbl_entry_handler handler,
> + struct acpi_subtable_proc *proc, int proc_num,
> unsigned int max_entries)
> {
> struct acpi_table_header *table_header = NULL;
> @@ -288,11 +301,10 @@ acpi_table_parse_entries(char *id,
> int count;
> u32 instance = 0;
>
> - if (acpi_disabled)
> + if (acpi_disabled) {
> + proc[0].count = -ENODEV;
> return -ENODEV;
> -
> - if (!id || !handler)
> - return -EINVAL;
> + }
>
> if (!strncmp(id, ACPI_SIG_MADT, 4))
> instance = acpi_apic_instance;
> @@ -300,11 +312,12 @@ acpi_table_parse_entries(char *id,
> acpi_get_table_with_size(id, instance, &table_header, &tbl_size);
> if (!table_header) {
> pr_warn("%4.4s not present\n", id);
> + proc[0].count = -ENODEV;
> return -ENODEV;
> }
>
> - count = acpi_parse_entries(id, table_size, handler, table_header,
> - entry_id, max_entries);
> + count = acpi_parse_entries(id, table_size, table_header,
> + proc, proc_num, max_entries);
>
> early_acpi_os_unmap_memory((char *)table_header, tbl_size);
> return count;
> @@ -314,9 +327,15 @@ int __init
> acpi_table_parse_madt(enum acpi_madt_type id,
> acpi_tbl_entry_handler handler, unsigned int max_entries)
> {
> - return acpi_table_parse_entries(ACPI_SIG_MADT,
> - sizeof(struct acpi_table_madt), id,
> - handler, max_entries);
> + struct acpi_subtable_proc madt_proc;
> +
> + memset(&madt_proc, 0, sizeof(madt_proc));
> + madt_proc.id = id;
> + madt_proc.handler = handler;
> +
> + return acpi_table_parse_entries_array(ACPI_SIG_MADT,
> + sizeof(struct acpi_table_madt),
> + &madt_proc, 1, max_entries);
> }
>
> /**
> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> index 4dd8826..d98b866 100644
> --- a/drivers/irqchip/irq-gic.c
> +++ b/drivers/irqchip/irq-gic.c
> @@ -1091,12 +1091,16 @@ gic_v2_acpi_init(struct acpi_table_header *table)
> {
> void __iomem *cpu_base, *dist_base;
> int count;
> + struct acpi_subtable_proc gic_proc;
> +
> + memset(&gic_proc, 0, sizeof(gic_proc));
> + gic_proc.id = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
> + gic_proc.handler = gic_acpi_parse_madt_cpu;
>
> /* Collect CPU base addresses */
> count = acpi_parse_entries(ACPI_SIG_MADT,
> sizeof(struct acpi_table_madt),
> - gic_acpi_parse_madt_cpu, table,
> - ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
> + table, &gic_proc, 1, 0);
> if (count <= 0) {
> pr_err("No valid GICC entries exist\n");
> return -EINVAL;
> @@ -1106,10 +1110,13 @@ gic_v2_acpi_init(struct acpi_table_header *table)
> * Find distributor base address. We expect one distributor entry since
> * ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
> */
> + memset(&gic_proc, 0, sizeof(gic_proc));
> + gic_proc.id = ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR;
> + gic_proc.handler = gic_acpi_parse_madt_distributor;
> +
> count = acpi_parse_entries(ACPI_SIG_MADT,
> sizeof(struct acpi_table_madt),
> - gic_acpi_parse_madt_distributor, table,
> - ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
> + table, &gic_proc, 1, 0);
> if (count <= 0) {
> pr_err("No valid GICD entries exist\n");
> return -EINVAL;
> diff --git a/include/linux/acpi.h b/include/linux/acpi.h
> index d2445fa..59b17e8 100644
> --- a/include/linux/acpi.h
> +++ b/include/linux/acpi.h
> @@ -135,6 +135,12 @@ static inline void acpi_initrd_override(void *data, size_t size)
> (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
> ((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
>
> +struct acpi_subtable_proc {
> + int id;
> + acpi_tbl_entry_handler handler;
> + int count;
> +};
> +
> char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
> void __acpi_unmap_table(char *map, unsigned long size);
> int early_acpi_boot_init(void);
> @@ -145,10 +151,13 @@ int acpi_numa_init (void);
>
> int acpi_table_init (void);
> int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
> +int acpi_table_parse_entries_array(char *id, unsigned long table_size,
> + struct acpi_subtable_proc *proc, int proc_num,
> + unsigned int max_entries);
> int __init acpi_parse_entries(char *id, unsigned long table_size,
> - acpi_tbl_entry_handler handler,
> struct acpi_table_header *table_header,
> - int entry_id, unsigned int max_entries);
> + struct acpi_subtable_proc *proc, int proc_num,
> + unsigned int max_entries);
> int __init acpi_table_parse_entries(char *id, unsigned long table_size,
> int entry_id,
> acpi_tbl_entry_handler handler,
>
You rename acpi_table_parse_entries -> acpi_table_parse_entries_array,
should acpi_table_parse_entries suppose to be removed above?

Tomasz

2015-08-26 17:49:37

by Lukasz Anaczkowski

[permalink] [raw]
Subject: Re: [PATCH] x86, acpi: Handle lapic/x2apic entries in MADT

Marc nad Lorenzo,

First of all appologies for breaking arm64 (again) and thank you for
debugging effort. I own you.

> - count is only incremented when max_entries != 0, as you noticed

You are right, sorry for that, it's fixed in v3.

> - With max_entries != 0, count now represent the sum of all matches
> Is that expected?

I have no strong opinion on that one. All of the x86 ACPI entries
handling only checks for count < 0, or uses count from the
acpi_subtable_proc structure (and that's why I didn't noticed the
mainline breakage).
If you think it's not correct or less usable than other approach,
let me know.

> - The proc iteration stops after the first match. Why?

So, the initial implementation of the acpi_parse_entries accepted
single handler for the ACPI table. Now, with this change, assumption
is that different handlers for different tables/subtables are passed,
meaning only one can meet entry->type == proc[i].id condition.
mainline breakage). This approach saves one local varaible, but
I don't think this is ultimate argument :)

> - The test for max_entries is done inside the proc loop. Why?

That's obviously wrong in context of the overall wrong counting.

> [...] this should be documented and agreed upon.

I've added description with assumptions. Again, if you think it's
not correct, let me know.

Tomasz Nowicki wrote:
> should acpi_table_parse_entries suppose to be removed above?

Thanks for pointing this out. I've missed implementation of
acpi_table_parse_entries when was backporting initial patch.
I've added it back.

Cheers,
Lukasz

2015-08-26 17:49:42

by Lukasz Anaczkowski

[permalink] [raw]
Subject: [PATCH] x86, arm64, acpi: Handle lapic/x2apic entries in MADT

v3:
() Fixed entries counting
() Added missing acpi_table_parse_entries definition
() acpi_parse_entries() now returns sum of all matching
entries

v2: Fixed ARM64 syntax error

>From the ACPI spec:
"Logical processors with APIC ID values less than 0xFF
(whether in XAPIC or X2APIC mode) must use the Processor LAPIC
structure [...]. Logical processors with APIC ID values 0xFF and
greater must use the Processor Local x2APIC structure."

Because of above, BIOS is first enumerating cores with HT with
LAPIC values (<0xFF) and then rest with X2APIC values (>=0xFF).

With current kernel code, where enumeration is in order:
BSP, X2APIC, LAPIC
enumeration on machine with more than 255 CPUs (each core with 4 HT)
first X2APIC IDs get low logical CPU IDs (1..x) and then LAPIC IDs
get higher logical CPU IDs (50..y), as in example below:

Core LCpu ApicId LCpu ApicId LCpu ApicId LCpu ApicId
0 0 0000 97 0001 145 0002 193 0003
1 50 0004 98 0005 146 0006 194 0007
2 51 0010 99 0011 147 0012 195 0013
3 52 0014 100 0015 148 0016 196 0017
4 53 0018 101 0019 149 001a 197 001b
5 54 001c 102 001d 150 001e 198 001f
...
62 95 00f8 143 00f9 191 00fa 239 00fb
63 37 00ff 96 00fc 144 00fd 192 00fe
64 1 0100 13 0101 25 0102 38 0103
65 2 0104 14 0105 26 0106 39 0107
...

(Core - physical core, LCpu - logical CPU, ApicId - ID assigned
by BIOS).

This is wrong for the following reasons:
() it's hard to predict how cores and threads will be enumerated
() when it's hard to predict, s/w threads cannot be properly affinitized
causing significant performance impact due to e.g. inproper cache
sharing
() enumeration is inconsistent with how threads are enumerated on
other Intel Xeon processors

To fix this, each LAPIC/X2APIC entry from MADT table needs to be
handled at the same time when processing it, thus adding
acpi_subtable_proc structure which stores
() ACPI table id
() handler that processes table
() counter how many items has been processed
and passing it to acpi_table_parse_entries().

Also, order in which MADT LAPIC/X2APIC handlers are passed is
reversed to achieve correct CPU enumeration.

In scenario when someone boots kernel with options 'maxcpus=72 nox2apic',
in result less cores may be booted, since some of the CPUs the kernel
will try to use will have APIC ID >= 0xFF. In such case, one
should not pass 'nox2apic'.

Disclimer: code parsing MADT LAPIC/X2APIC has not been touched since 2009,
when X2APIC support was initially added. I do not know why MADT parsing
code was added in the reversed order in the first place.
I guess it didn't matter at that time since nobody cared about cores
with APIC IDs >= 0xFF, right?

This patch is based on work of "Yinghai Lu <[email protected]>"
previously published at https://lkml.org/lkml/2013/1/21/563,
thus putting Yinghai Lu as 'Signed-off-by', as well.

Signed-off-by: Yinghai Lu <[email protected]>
Signed-off-by: Lukasz Anaczkowski <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
---
arch/x86/kernel/acpi/boot.c | 29 ++++++++----
drivers/acpi/numa.c | 28 ++++++++---
drivers/acpi/tables.c | 113 +++++++++++++++++++++++++++++++++-----------
drivers/irqchip/irq-gic.c | 15 ++++--
include/linux/acpi.h | 13 ++++-
5 files changed, 149 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e49ee24..fb4a9d6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -981,6 +981,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
{
int count;
int x2count = 0;
+ struct acpi_subtable_proc madt_proc[2];

if (!cpu_has_apic)
return -ENODEV;
@@ -1004,10 +1005,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
acpi_parse_sapic, MAX_LOCAL_APIC);

if (!count) {
- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
- acpi_parse_x2apic, MAX_LOCAL_APIC);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
- acpi_parse_lapic, MAX_LOCAL_APIC);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+ madt_proc[0].handler = acpi_parse_lapic;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+ madt_proc[1].handler = acpi_parse_x2apic;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
}
if (!count && !x2count) {
printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -1019,10 +1026,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
return count;
}

- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
- acpi_parse_x2apic_nmi, 0);
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
- acpi_parse_lapic_nmi, 0);
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_X2APIC_NMI;
+ madt_proc[0].handler = acpi_parse_x2apic_nmi;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_APIC_NMI;
+ madt_proc[1].handler = acpi_parse_lapic_nmi;
+ acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), 0);
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
/* TBD: Cleanup to allow fallback to MPS */
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index acaa3b4..a000195 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -314,9 +314,15 @@ static int __init
acpi_table_parse_srat(enum acpi_srat_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_SRAT,
- sizeof(struct acpi_table_srat), id,
- handler, max_entries);
+ struct acpi_subtable_proc srat_proc;
+
+ memset(&srat_proc, 0, sizeof(srat_proc));
+ srat_proc.id = id;
+ srat_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ &srat_proc, 1, max_entries);
}

int __init acpi_numa_init(void)
@@ -331,10 +337,18 @@ int __init acpi_numa_init(void)

/* SRAT: Static Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
- acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
- acpi_parse_x2apic_affinity, 0);
- acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
- acpi_parse_processor_affinity, 0);
+ struct acpi_subtable_proc srat_proc[2];
+
+ memset(srat_proc, 0, sizeof(srat_proc));
+ srat_proc[0].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
+ srat_proc[0].handler = acpi_parse_x2apic_affinity;
+ srat_proc[1].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
+ srat_proc[1].handler = acpi_parse_processor_affinity;
+
+ acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ srat_proc, ARRAY_SIZE(srat_proc), 0);
+
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 2e19189..d5c9a1b 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -214,27 +214,45 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
}
}

+/**
+ * acpi_table_parse - for each proc_num find a subtable with proc->id
+ * and run proc->handler on it. Assumption is that there's only
+ * single handler for particular id.
+ *
+ * @id: table id (for debugging purposes)
+ * @table_size: single entry size
+ * @table_header: where does the table start?
+ * @proc: array of acpi_subtable_proc struct containing subtable id
+ * and associated handler with it
+ * @proc_num: how big proc is?
+ * @max_entries: how many entries can we process?
+ *
+ * On success returns sum of all matching entries for all proc handlers.
+ * Oterwise, -ENODEV or -EINVAL is returned.
+ */
int __init
acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries)
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries)
{
struct acpi_subtable_header *entry;
int count = 0;
unsigned long table_end;
+ int i;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
-
- if (!table_size)
+ }
+ if (!table_size) {
+ proc[0].count = -EINVAL;
return -EINVAL;
+ }

if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

@@ -247,20 +265,31 @@ acpi_parse_entries(char *id, unsigned long table_size,

while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
table_end) {
- if (entry->type == entry_id
- && (!max_entries || count < max_entries)) {
- if (handler(entry, table_end))
+ if (max_entries && count >= max_entries)
+ continue;
+ for (i = 0; i < proc_num; i++) {
+ if (entry->type != proc[i].id)
+ continue;
+ if (proc[i].handler(entry, table_end)) {
+ proc[i].count = -EINVAL;
return -EINVAL;
-
- count++;
+ }
+ proc[i].count++;
+ break;
}
+ if (i != proc_num)
+ count++;

/*
* If entry->length is 0, break from this loop to avoid
* infinite loop.
*/
if (entry->length == 0) {
- pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, entry_id);
+ pr_err("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] Invalid zero length\n");
+ proc[0].count = -EINVAL;
return -EINVAL;
}

@@ -269,18 +298,20 @@ acpi_parse_entries(char *id, unsigned long table_size,
}

if (max_entries && count > max_entries) {
- pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
- id, entry_id, count - max_entries, count);
+ pr_warn("[%4.4s:0x%02x ", id, proc[0].id);
+ for (i = 1; i < proc_num; i++)
+ pr_cont(" 0x%02x", proc[i].id);
+ pr_cont("] ignored %i entries of %i found\n",
+ count-max_entries, count);
}

return count;
}

int __init
-acpi_table_parse_entries(char *id,
+acpi_table_parse_entries_array(char *id,
unsigned long table_size,
- int entry_id,
- acpi_tbl_entry_handler handler,
+ struct acpi_subtable_proc *proc, int proc_num,
unsigned int max_entries)
{
struct acpi_table_header *table_header = NULL;
@@ -288,11 +319,10 @@ acpi_table_parse_entries(char *id,
int count;
u32 instance = 0;

- if (acpi_disabled)
+ if (acpi_disabled) {
+ proc[0].count = -ENODEV;
return -ENODEV;
-
- if (!id || !handler)
- return -EINVAL;
+ }

if (!strncmp(id, ACPI_SIG_MADT, 4))
instance = acpi_apic_instance;
@@ -300,23 +330,50 @@ acpi_table_parse_entries(char *id,
acpi_get_table_with_size(id, instance, &table_header, &tbl_size);
if (!table_header) {
pr_warn("%4.4s not present\n", id);
+ proc[0].count = -ENODEV;
return -ENODEV;
}

- count = acpi_parse_entries(id, table_size, handler, table_header,
- entry_id, max_entries);
+ count = acpi_parse_entries(id, table_size, table_header,
+ proc, proc_num, max_entries);

early_acpi_os_unmap_memory((char *)table_header, tbl_size);
return count;
}

int __init
+acpi_table_parse_entries(char *id,
+ unsigned long table_size,
+ int entry_id,
+ acpi_tbl_entry_handler handler,
+ unsigned int max_entries)
+{
+ struct acpi_subtable_proc proc[1];
+
+ if (!handler)
+ return -EINVAL;
+
+ memset(proc, 0, sizeof(proc));
+ proc[0].id = entry_id;
+ proc[0].handler = handler;
+
+ return acpi_table_parse_entries_array(id, table_size, proc, 1,
+ max_entries);
+}
+
+int __init
acpi_table_parse_madt(enum acpi_madt_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
- return acpi_table_parse_entries(ACPI_SIG_MADT,
- sizeof(struct acpi_table_madt), id,
- handler, max_entries);
+ struct acpi_subtable_proc madt_proc;
+
+ memset(&madt_proc, 0, sizeof(madt_proc));
+ madt_proc.id = id;
+ madt_proc.handler = handler;
+
+ return acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ &madt_proc, 1, max_entries);
}

/**
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4dd8826..d98b866 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1091,12 +1091,16 @@ gic_v2_acpi_init(struct acpi_table_header *table)
{
void __iomem *cpu_base, *dist_base;
int count;
+ struct acpi_subtable_proc gic_proc;
+
+ memset(&gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
+ gic_proc.handler = gic_acpi_parse_madt_cpu;

/* Collect CPU base addresses */
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_cpu, table,
- ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
+ table, &gic_proc, 1, 0);
if (count <= 0) {
pr_err("No valid GICC entries exist\n");
return -EINVAL;
@@ -1106,10 +1110,13 @@ gic_v2_acpi_init(struct acpi_table_header *table)
* Find distributor base address. We expect one distributor entry since
* ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
*/
+ memset(&gic_proc, 0, sizeof(gic_proc));
+ gic_proc.id = ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR;
+ gic_proc.handler = gic_acpi_parse_madt_distributor;
+
count = acpi_parse_entries(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
- gic_acpi_parse_madt_distributor, table,
- ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
+ table, &gic_proc, 1, 0);
if (count <= 0) {
pr_err("No valid GICD entries exist\n");
return -EINVAL;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d2445fa..59b17e8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -135,6 +135,12 @@ static inline void acpi_initrd_override(void *data, size_t size)
(!entry) || (unsigned long)entry + sizeof(*entry) > end || \
((struct acpi_subtable_header *)entry)->length < sizeof(*entry))

+struct acpi_subtable_proc {
+ int id;
+ acpi_tbl_entry_handler handler;
+ int count;
+};
+
char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
void __acpi_unmap_table(char *map, unsigned long size);
int early_acpi_boot_init(void);
@@ -145,10 +151,13 @@ int acpi_numa_init (void);

int acpi_table_init (void);
int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
+int acpi_table_parse_entries_array(char *id, unsigned long table_size,
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_parse_entries(char *id, unsigned long table_size,
- acpi_tbl_entry_handler handler,
struct acpi_table_header *table_header,
- int entry_id, unsigned int max_entries);
+ struct acpi_subtable_proc *proc, int proc_num,
+ unsigned int max_entries);
int __init acpi_table_parse_entries(char *id, unsigned long table_size,
int entry_id,
acpi_tbl_entry_handler handler,
--
1.8.3.1

2015-08-27 09:37:42

by Lorenzo Pieralisi

[permalink] [raw]
Subject: Re: [PATCH] x86, arm64, acpi: Handle lapic/x2apic entries in MADT

On Wed, Aug 26, 2015 at 06:49:29PM +0100, Lukasz Anaczkowski wrote:

[...]

> diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
> index 2e19189..d5c9a1b 100644
> --- a/drivers/acpi/tables.c
> +++ b/drivers/acpi/tables.c
> @@ -214,27 +214,45 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
> }
> }
>
> +/**
> + * acpi_table_parse - for each proc_num find a subtable with proc->id

Name of function does not correspond to the function you are actually
commenting.

> + * and run proc->handler on it. Assumption is that there's only
> + * single handler for particular id.
> + *
> + * @id: table id (for debugging purposes)
> + * @table_size: single entry size
> + * @table_header: where does the table start?
> + * @proc: array of acpi_subtable_proc struct containing subtable id
> + * and associated handler with it
> + * @proc_num: how big proc is?
> + * @max_entries: how many entries can we process?
> + *
> + * On success returns sum of all matching entries for all proc handlers.
> + * Oterwise, -ENODEV or -EINVAL is returned.

s/Oterwise/Otherwise

> + */
> int __init
> acpi_parse_entries(char *id, unsigned long table_size,
> - acpi_tbl_entry_handler handler,
> struct acpi_table_header *table_header,
> - int entry_id, unsigned int max_entries)
> + struct acpi_subtable_proc *proc, int proc_num,
> + unsigned int max_entries)
> {
> struct acpi_subtable_header *entry;
> int count = 0;
> unsigned long table_end;
> + int i;
>
> - if (acpi_disabled)
> + if (acpi_disabled) {
> + proc[0].count = -ENODEV;
> return -ENODEV;
> -
> - if (!id || !handler)
> - return -EINVAL;
> -
> - if (!table_size)
> + }

Add a space please

> + if (!table_size) {
> + proc[0].count = -EINVAL;

This is misleading. Why do we want to return error only in the first entry ?
If I get the function logic right, if this function returns an error
somehow the parsing failed, so the proc array content must be considered
invalid. Ergo, just returning an error code should be enough and
that's what you should check first in eg acpi_parse_madt_lapic_entries().

Comment is valid for the whole patch.

> return -EINVAL;
> + }
>
> if (!table_header) {
> pr_warn("%4.4s not present\n", id);
> + proc[0].count = -ENODEV;
> return -ENODEV;
> }
>
> @@ -247,20 +265,31 @@ acpi_parse_entries(char *id, unsigned long table_size,
>
> while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
> table_end) {
> - if (entry->type == entry_id
> - && (!max_entries || count < max_entries)) {
> - if (handler(entry, table_end))
> + if (max_entries && count >= max_entries)
> + continue;
> + for (i = 0; i < proc_num; i++) {
> + if (entry->type != proc[i].id)
> + continue;
> + if (proc[i].handler(entry, table_end)) {
> + proc[i].count = -EINVAL;
> return -EINVAL;
> -
> - count++;
> + }
> + proc[i].count++;
> + break;
> }
> + if (i != proc_num)
> + count++;
>
> /*
> * If entry->length is 0, break from this loop to avoid
> * infinite loop.
> */
> if (entry->length == 0) {
> - pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, entry_id);
> + pr_err("[%4.4s:0x%02x ", id, proc[0].id);
> + for (i = 1; i < proc_num; i++)
> + pr_cont(" 0x%02x", proc[i].id);
> + pr_cont("] Invalid zero length\n");
> + proc[0].count = -EINVAL;
> return -EINVAL;
> }
>
> @@ -269,18 +298,20 @@ acpi_parse_entries(char *id, unsigned long table_size,
> }
>
> if (max_entries && count > max_entries) {
> - pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
> - id, entry_id, count - max_entries, count);
> + pr_warn("[%4.4s:0x%02x ", id, proc[0].id);
> + for (i = 1; i < proc_num; i++)
> + pr_cont(" 0x%02x", proc[i].id);
> + pr_cont("] ignored %i entries of %i found\n",
> + count-max_entries, count);
> }
>
> return count;
> }
>
> int __init
> -acpi_table_parse_entries(char *id,
> +acpi_table_parse_entries_array(char *id,
> unsigned long table_size,
> - int entry_id,
> - acpi_tbl_entry_handler handler,
> + struct acpi_subtable_proc *proc, int proc_num,
> unsigned int max_entries)
> {
> struct acpi_table_header *table_header = NULL;
> @@ -288,11 +319,10 @@ acpi_table_parse_entries(char *id,
> int count;
> u32 instance = 0;
>
> - if (acpi_disabled)
> + if (acpi_disabled) {
> + proc[0].count = -ENODEV;
> return -ENODEV;
> -
> - if (!id || !handler)
> - return -EINVAL;
> + }
>
> if (!strncmp(id, ACPI_SIG_MADT, 4))
> instance = acpi_apic_instance;
> @@ -300,23 +330,50 @@ acpi_table_parse_entries(char *id,
> acpi_get_table_with_size(id, instance, &table_header, &tbl_size);
> if (!table_header) {
> pr_warn("%4.4s not present\n", id);
> + proc[0].count = -ENODEV;
> return -ENODEV;
> }
>
> - count = acpi_parse_entries(id, table_size, handler, table_header,
> - entry_id, max_entries);
> + count = acpi_parse_entries(id, table_size, table_header,
> + proc, proc_num, max_entries);
>
> early_acpi_os_unmap_memory((char *)table_header, tbl_size);
> return count;
> }
>
> int __init
> +acpi_table_parse_entries(char *id,
> + unsigned long table_size,
> + int entry_id,
> + acpi_tbl_entry_handler handler,
> + unsigned int max_entries)
> +{
> + struct acpi_subtable_proc proc[1];
> +
> + if (!handler)
> + return -EINVAL;
> +
> + memset(proc, 0, sizeof(proc));
> + proc[0].id = entry_id;
> + proc[0].handler = handler;
> +
> + return acpi_table_parse_entries_array(id, table_size, proc, 1,
> + max_entries);
> +}
> +
> +int __init
> acpi_table_parse_madt(enum acpi_madt_type id,
> acpi_tbl_entry_handler handler, unsigned int max_entries)
> {
> - return acpi_table_parse_entries(ACPI_SIG_MADT,
> - sizeof(struct acpi_table_madt), id,
> - handler, max_entries);

Why can't you leave this call as-is ?

> + struct acpi_subtable_proc madt_proc;

Is there a reason why you use a struct here and a size 1 array in
acpi_table_parse_entries() ? Keep them consistent.

Actually, I do not think you need to parse acpi_table_parse_madt() at
all unless I am missing something.

Lorenzo

> +
> + memset(&madt_proc, 0, sizeof(madt_proc));
> + madt_proc.id = id;
> + madt_proc.handler = handler;
> +
> + return acpi_table_parse_entries_array(ACPI_SIG_MADT,
> + sizeof(struct acpi_table_madt),
> + &madt_proc, 1, max_entries);
> }
>
> /**
> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> index 4dd8826..d98b866 100644
> --- a/drivers/irqchip/irq-gic.c
> +++ b/drivers/irqchip/irq-gic.c
> @@ -1091,12 +1091,16 @@ gic_v2_acpi_init(struct acpi_table_header *table)
> {
> void __iomem *cpu_base, *dist_base;
> int count;
> + struct acpi_subtable_proc gic_proc;
> +
> + memset(&gic_proc, 0, sizeof(gic_proc));
> + gic_proc.id = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
> + gic_proc.handler = gic_acpi_parse_madt_cpu;
>
> /* Collect CPU base addresses */
> count = acpi_parse_entries(ACPI_SIG_MADT,
> sizeof(struct acpi_table_madt),
> - gic_acpi_parse_madt_cpu, table,
> - ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
> + table, &gic_proc, 1, 0);
> if (count <= 0) {
> pr_err("No valid GICC entries exist\n");
> return -EINVAL;
> @@ -1106,10 +1110,13 @@ gic_v2_acpi_init(struct acpi_table_header *table)
> * Find distributor base address. We expect one distributor entry since
> * ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
> */
> + memset(&gic_proc, 0, sizeof(gic_proc));
> + gic_proc.id = ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR;
> + gic_proc.handler = gic_acpi_parse_madt_distributor;
> +
> count = acpi_parse_entries(ACPI_SIG_MADT,
> sizeof(struct acpi_table_madt),
> - gic_acpi_parse_madt_distributor, table,
> - ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
> + table, &gic_proc, 1, 0);
> if (count <= 0) {
> pr_err("No valid GICD entries exist\n");
> return -EINVAL;
> diff --git a/include/linux/acpi.h b/include/linux/acpi.h
> index d2445fa..59b17e8 100644
> --- a/include/linux/acpi.h
> +++ b/include/linux/acpi.h
> @@ -135,6 +135,12 @@ static inline void acpi_initrd_override(void *data, size_t size)
> (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
> ((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
>
> +struct acpi_subtable_proc {
> + int id;
> + acpi_tbl_entry_handler handler;
> + int count;
> +};
> +
> char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
> void __acpi_unmap_table(char *map, unsigned long size);
> int early_acpi_boot_init(void);
> @@ -145,10 +151,13 @@ int acpi_numa_init (void);
>
> int acpi_table_init (void);
> int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
> +int acpi_table_parse_entries_array(char *id, unsigned long table_size,
> + struct acpi_subtable_proc *proc, int proc_num,
> + unsigned int max_entries);
> int __init acpi_parse_entries(char *id, unsigned long table_size,
> - acpi_tbl_entry_handler handler,
> struct acpi_table_header *table_header,
> - int entry_id, unsigned int max_entries);
> + struct acpi_subtable_proc *proc, int proc_num,
> + unsigned int max_entries);
> int __init acpi_table_parse_entries(char *id, unsigned long table_size,
> int entry_id,
> acpi_tbl_entry_handler handler,
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

2015-08-28 08:30:43

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] x86, arm64, acpi: Handle lapic/x2apic entries in MADT


* Lukasz Anaczkowski <[email protected]> wrote:

> This patch is based on work of "Yinghai Lu <[email protected]>"
> previously published at https://lkml.org/lkml/2013/1/21/563,
> thus putting Yinghai Lu as 'Signed-off-by', as well.
>
> Signed-off-by: Yinghai Lu <[email protected]>
> Signed-off-by: Lukasz Anaczkowski <[email protected]>
> Acked-by: Thomas Gleixner <[email protected]>
> ---
> arch/x86/kernel/acpi/boot.c | 29 ++++++++----
> drivers/acpi/numa.c | 28 ++++++++---
> drivers/acpi/tables.c | 113 +++++++++++++++++++++++++++++++++-----------
> drivers/irqchip/irq-gic.c | 15 ++++--
> include/linux/acpi.h | 13 ++++-
> 5 files changed, 149 insertions(+), 49 deletions(-)

Yeah, so this patch is way too large, it should be split into 3-4 parts that first
introduce separate changes (such as renames), then add any extra functions that
might be needed, and finally does the minimal changes that actually change
behavior.

Thanks,

Ingo