2019-04-24 11:07:42

by Baoquan He

[permalink] [raw]
Subject: [PATCH v5 0/2] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel

In this series, patch 2/2 has dependency on patch 1/1, otherwise
it may cause system to reset to firmware on some machines.

The patch 2/2 is the version Boris organized:
http://lkml.kernel.org/r/[email protected]

Patch 1/1 is based on Kairui's v1 patch, and add ACPI tables mapping:
http://lkml.kernel.org/r/[email protected]

Dave Young confirmed this patchset passed test on his t420 laptop, where
the system resetting issue caused by patch 2/2 was found.

Junichi Nomura (1):
x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels

Kairui Song (1):
x86/kexec: Build identity mapping for EFI systab and ACPI tables

arch/x86/boot/compressed/acpi.c | 143 +++++++++++++++++++++--------
arch/x86/kernel/machine_kexec_64.c | 86 +++++++++++++++++
2 files changed, 193 insertions(+), 36 deletions(-)

--
2.17.2


2019-04-24 09:35:25

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels

On 04/24/19 at 05:29pm, Baoquan He wrote:
> From: Junichi Nomura <[email protected]>
>
> Commit
>
> 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")
>
> broke kexec boot on EFI systems. efi_get_rsdp_addr() in the early
> parsing code tries to search RSDP from the EFI tables but that will
> crash because the table address is virtual when the kernel was booted by
> kexec (set_virtual_address_map() has run in the first kernel and cannot
> be run again in the second kernel).
>
> In the case of kexec, the physical address of EFI tables is provided via
> efi_setup_data in boot_params, which is set up by kexec(1).
>
> Factor out the table parsing code and use different pointers depending
> on whether the kernel is booted by kexec or not.
>
> [ bp: Massage. ]
>
> Fixes: 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")
> Signed-off-by: Jun'ichi Nomura <[email protected]>
> Signed-off-by: Borislav Petkov <[email protected]>
> Cc: Chao Fan <[email protected]>
> Cc: Borislav Petkov <[email protected]>
> Cc: Dave Young <[email protected]>
> Link: https://lkml.kernel.org/r/[email protected]
> Signed-off-by: Baoquan He <[email protected]>

Oops, forgot removing this line of Signed-off-by, it's auto generated by
git format-patch.

> ---
> arch/x86/boot/compressed/acpi.c | 143 ++++++++++++++++++++++++--------
> 1 file changed, 107 insertions(+), 36 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
> index 0ef4ad55b29b..8cecce1ac0cd 100644
> --- a/arch/x86/boot/compressed/acpi.c
> +++ b/arch/x86/boot/compressed/acpi.c
> @@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void)
> return addr;
> }
>
> -/* Search EFI system tables for RSDP. */
> -static acpi_physical_address efi_get_rsdp_addr(void)
> +/*
> + * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and
> + * ACPI_TABLE_GUID are found, take the former, which has more features.
> + */
> +static acpi_physical_address
> +__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
> + bool efi_64)
> {
> acpi_physical_address rsdp_addr = 0;
>
> #ifdef CONFIG_EFI
> - unsigned long systab, systab_tables, config_tables;
> + int i;
> +
> + /* Get EFI tables from systab. */
> + for (i = 0; i < nr_tables; i++) {
> + acpi_physical_address table;
> + efi_guid_t guid;
> +
> + if (efi_64) {
> + efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
> +
> + guid = tbl->guid;
> + table = tbl->table;
> +
> + if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
> + debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
> + return 0;
> + }
> + } else {
> + efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;
> +
> + guid = tbl->guid;
> + table = tbl->table;
> + }
> +
> + if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
> + rsdp_addr = table;
> + else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
> + return table;
> + }
> +#endif
> + return rsdp_addr;
> +}
> +
> +/* EFI/kexec support is 64-bit only. */
> +#ifdef CONFIG_X86_64
> +static struct efi_setup_data *get_kexec_setup_data_addr(void)
> +{
> + struct setup_data *data;
> + u64 pa_data;
> +
> + pa_data = boot_params->hdr.setup_data;
> + while (pa_data) {
> + data = (struct setup_data *)pa_data;
> + if (data->type == SETUP_EFI)
> + return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
> +
> + pa_data = data->next;
> + }
> + return NULL;
> +}
> +
> +static acpi_physical_address kexec_get_rsdp_addr(void)
> +{
> + efi_system_table_64_t *systab;
> + struct efi_setup_data *esd;
> + struct efi_info *ei;
> + char *sig;
> +
> + esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
> + if (!esd)
> + return 0;
> +
> + if (!esd->tables) {
> + debug_putstr("Wrong kexec SETUP_EFI data.\n");
> + return 0;
> + }
> +
> + ei = &boot_params->efi_info;
> + sig = (char *)&ei->efi_loader_signature;
> + if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
> + debug_putstr("Wrong kexec EFI loader signature.\n");
> + return 0;
> + }
> +
> + /* Get systab from boot params. */
> + systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
> + if (!systab)
> + error("EFI system table not found in kexec boot_params.");
> +
> + return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
> +}
> +#else
> +static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
> +#endif /* CONFIG_X86_64 */
> +
> +static acpi_physical_address efi_get_rsdp_addr(void)
> +{
> +#ifdef CONFIG_EFI
> + unsigned long systab, config_tables;
> unsigned int nr_tables;
> struct efi_info *ei;
> bool efi_64;
> - int size, i;
> char *sig;
>
> ei = &boot_params->efi_info;
> @@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)
>
> config_tables = stbl->tables;
> nr_tables = stbl->nr_tables;
> - size = sizeof(efi_config_table_64_t);
> } else {
> efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
>
> config_tables = stbl->tables;
> nr_tables = stbl->nr_tables;
> - size = sizeof(efi_config_table_32_t);
> }
>
> if (!config_tables)
> error("EFI config tables not found.");
>
> - /* Get EFI tables from systab. */
> - for (i = 0; i < nr_tables; i++) {
> - acpi_physical_address table;
> - efi_guid_t guid;
> -
> - config_tables += size;
> -
> - if (efi_64) {
> - efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
> -
> - guid = tbl->guid;
> - table = tbl->table;
> -
> - if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
> - debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
> - return 0;
> - }
> - } else {
> - efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
> -
> - guid = tbl->guid;
> - table = tbl->table;
> - }
> -
> - if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
> - rsdp_addr = table;
> - else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
> - return table;
> - }
> + return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
> +#else
> + return 0;
> #endif
> - return rsdp_addr;
> }
>
> static u8 compute_checksum(u8 *buffer, u32 length)
> @@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void)
> if (!pa)
> pa = boot_params->acpi_rsdp_addr;
>
> + /*
> + * Try to get EFI data from setup_data. This can happen when we're a
> + * kexec'ed kernel and kexec(1) has passed all the required EFI info to
> + * us.
> + */
> + if (!pa)
> + pa = kexec_get_rsdp_addr();
> +
> if (!pa)
> pa = efi_get_rsdp_addr();
>
> --
> 2.17.2
>

2019-04-24 09:42:25

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v5 0/2] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel

On Wed, Apr 24, 2019 at 05:29:42PM +0800, Baoquan He wrote:
> In this series, patch 2/2 has dependency on patch 1/1, otherwise
> it may cause system to reset to firmware on some machines.
>
> The patch 2/2 is the version Boris organized:
> http://lkml.kernel.org/r/[email protected]
>
> Patch 1/1 is based on Kairui's v1 patch, and add ACPI tables mapping:
> http://lkml.kernel.org/r/[email protected]
>
> Dave Young confirmed this patchset passed test on his t420 laptop, where
> the system resetting issue caused by patch 2/2 was found.

I guess but this does not give me the warm and fuzzy feeling one week
before the merge window.

So:

https://git.kernel.org/tip/36f0c423552dacaca152324b8e9bda42a6d88865

and we all can relax ourselves and take the next release cycle to test
the hell out of this before reenabling it again.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-04-24 10:23:28

by Baoquan He

[permalink] [raw]
Subject: [PATCH v5 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

From: Kairui Song <[email protected]>

The current code only builds identity mapping for physical memory during
kexec-type loading. The regions reserved by firmware are not covered.
In the next patch, the boot decompressing code of kexec-ed kernel tries
to access EFI systab and ACPI tables, lacking identity mapping for them
will cause error and reset system to firmware.

This error doesn't happen on all systems. Because kexec enables gbpages
to build identity mapping, the EFI systab and ACPI tables could have been
covered if they share the same 1 GB area with physical memory. To make
sure, we should map them always.

So here add mapping for them.

Signed-off-by: Kairui Song <[email protected]>
---
arch/x86/kernel/machine_kexec_64.c | 86 ++++++++++++++++++++++++++++++
1 file changed, 86 insertions(+)

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index ceba408ea982..77b40c3e28d7 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
#include <linux/io.h>
#include <linux/suspend.h>
#include <linux/vmalloc.h>
+#include <linux/efi.h>

#include <asm/init.h>
#include <asm/pgtable.h>
@@ -29,6 +30,48 @@
#include <asm/setup.h>
#include <asm/set_memory.h>

+#ifdef CONFIG_ACPI
+/**
+ * Used while adding mapping for ACPI tables.
+ * Can be reused when other iomem regions need be mapped
+ */
+struct init_pgtable_data {
+ struct x86_mapping_info *info;
+ pgd_t *level4p;
+};
+
+static int mem_region_callback(struct resource *res, void *arg)
+{
+ struct init_pgtable_data *data = arg;
+ unsigned long mstart, mend;
+
+ mstart = res->start;
+ mend = mstart + resource_size(res) - 1;
+
+ return kernel_ident_mapping_init(data->info,
+ data->level4p, mstart, mend);
+}
+
+static int init_acpi_pgtable(struct x86_mapping_info *info,
+ pgd_t *level4p)
+{
+ unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ struct init_pgtable_data data;
+
+ data.info = info;
+ data.level4p = level4p;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
+ &data, mem_region_callback);
+}
+#else
+static int init_acpi_pgtable(struct x86_mapping_info *info,
+ pgd_t *level4p)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_KEXEC_FILE
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_bzImage64_ops,
@@ -36,6 +79,37 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
};
#endif

+#ifdef CONFIG_EFI
+static int init_efi_systab_pgtable(struct x86_mapping_info *info,
+ pgd_t *level4p)
+{
+ unsigned long mstart, mend;
+
+ if (!efi_enabled(EFI_BOOT))
+ return 0;
+
+ mstart = (boot_params.efi_info.efi_systab |
+ ((u64)boot_params.efi_info.efi_systab_hi<<32));
+
+ if (efi_enabled(EFI_64BIT))
+ mend = mstart + sizeof(efi_system_table_64_t);
+ else
+ mend = mstart + sizeof(efi_system_table_32_t);
+
+ if (mstart)
+ return kernel_ident_mapping_init(info,
+ level4p, mstart, mend);
+
+ return 0;
+}
+#else
+static inline int init_efi_systab_pgtable(struct x86_mapping_info *info,
+ pgd_t *level4p)
+{
+ return 0;
+}
+#endif
+
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.p4d);
@@ -159,6 +233,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
return result;
}

+ /**
+ * Prepare EFI systab and ACPI table mapping for kexec kernel,
+ * since they are not covered by pfn_mapped.
+ */
+ result = init_efi_systab_pgtable(&info, level4p);
+ if (result)
+ return result;
+
+ result = init_acpi_pgtable(&info, level4p);
+ if (result)
+ return result;
+
return init_transition_pgtable(image, level4p);
}

--
2.17.2

2019-04-24 11:07:43

by Baoquan He

[permalink] [raw]
Subject: [PATCH v5 2/2] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels

From: Junichi Nomura <[email protected]>

Commit

3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")

broke kexec boot on EFI systems. efi_get_rsdp_addr() in the early
parsing code tries to search RSDP from the EFI tables but that will
crash because the table address is virtual when the kernel was booted by
kexec (set_virtual_address_map() has run in the first kernel and cannot
be run again in the second kernel).

In the case of kexec, the physical address of EFI tables is provided via
efi_setup_data in boot_params, which is set up by kexec(1).

Factor out the table parsing code and use different pointers depending
on whether the kernel is booted by kexec or not.

[ bp: Massage. ]

Fixes: 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")
Signed-off-by: Jun'ichi Nomura <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: Chao Fan <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Dave Young <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Baoquan He <[email protected]>
---
arch/x86/boot/compressed/acpi.c | 143 ++++++++++++++++++++++++--------
1 file changed, 107 insertions(+), 36 deletions(-)

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 0ef4ad55b29b..8cecce1ac0cd 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void)
return addr;
}

-/* Search EFI system tables for RSDP. */
-static acpi_physical_address efi_get_rsdp_addr(void)
+/*
+ * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and
+ * ACPI_TABLE_GUID are found, take the former, which has more features.
+ */
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
+ bool efi_64)
{
acpi_physical_address rsdp_addr = 0;

#ifdef CONFIG_EFI
- unsigned long systab, systab_tables, config_tables;
+ int i;
+
+ /* Get EFI tables from systab. */
+ for (i = 0; i < nr_tables; i++) {
+ acpi_physical_address table;
+ efi_guid_t guid;
+
+ if (efi_64) {
+ efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
+
+ guid = tbl->guid;
+ table = tbl->table;
+
+ if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+ debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+ return 0;
+ }
+ } else {
+ efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;
+
+ guid = tbl->guid;
+ table = tbl->table;
+ }
+
+ if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+ rsdp_addr = table;
+ else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+ return table;
+ }
+#endif
+ return rsdp_addr;
+}
+
+/* EFI/kexec support is 64-bit only. */
+#ifdef CONFIG_X86_64
+static struct efi_setup_data *get_kexec_setup_data_addr(void)
+{
+ struct setup_data *data;
+ u64 pa_data;
+
+ pa_data = boot_params->hdr.setup_data;
+ while (pa_data) {
+ data = (struct setup_data *)pa_data;
+ if (data->type == SETUP_EFI)
+ return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+
+ pa_data = data->next;
+ }
+ return NULL;
+}
+
+static acpi_physical_address kexec_get_rsdp_addr(void)
+{
+ efi_system_table_64_t *systab;
+ struct efi_setup_data *esd;
+ struct efi_info *ei;
+ char *sig;
+
+ esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
+ if (!esd)
+ return 0;
+
+ if (!esd->tables) {
+ debug_putstr("Wrong kexec SETUP_EFI data.\n");
+ return 0;
+ }
+
+ ei = &boot_params->efi_info;
+ sig = (char *)&ei->efi_loader_signature;
+ if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+ debug_putstr("Wrong kexec EFI loader signature.\n");
+ return 0;
+ }
+
+ /* Get systab from boot params. */
+ systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
+ if (!systab)
+ error("EFI system table not found in kexec boot_params.");
+
+ return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+}
+#else
+static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
+#endif /* CONFIG_X86_64 */
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+#ifdef CONFIG_EFI
+ unsigned long systab, config_tables;
unsigned int nr_tables;
struct efi_info *ei;
bool efi_64;
- int size, i;
char *sig;

ei = &boot_params->efi_info;
@@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)

config_tables = stbl->tables;
nr_tables = stbl->nr_tables;
- size = sizeof(efi_config_table_64_t);
} else {
efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;

config_tables = stbl->tables;
nr_tables = stbl->nr_tables;
- size = sizeof(efi_config_table_32_t);
}

if (!config_tables)
error("EFI config tables not found.");

- /* Get EFI tables from systab. */
- for (i = 0; i < nr_tables; i++) {
- acpi_physical_address table;
- efi_guid_t guid;
-
- config_tables += size;
-
- if (efi_64) {
- efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
-
- guid = tbl->guid;
- table = tbl->table;
-
- if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
- debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
- return 0;
- }
- } else {
- efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
-
- guid = tbl->guid;
- table = tbl->table;
- }
-
- if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
- rsdp_addr = table;
- else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
- return table;
- }
+ return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+#else
+ return 0;
#endif
- return rsdp_addr;
}

static u8 compute_checksum(u8 *buffer, u32 length)
@@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void)
if (!pa)
pa = boot_params->acpi_rsdp_addr;

+ /*
+ * Try to get EFI data from setup_data. This can happen when we're a
+ * kexec'ed kernel and kexec(1) has passed all the required EFI info to
+ * us.
+ */
+ if (!pa)
+ pa = kexec_get_rsdp_addr();
+
if (!pa)
pa = efi_get_rsdp_addr();

--
2.17.2

2019-04-24 13:25:47

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v5 0/2] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel

On 04/24/19 at 11:38am, Borislav Petkov wrote:
> On Wed, Apr 24, 2019 at 05:29:42PM +0800, Baoquan He wrote:
> > In this series, patch 2/2 has dependency on patch 1/1, otherwise
> > it may cause system to reset to firmware on some machines.
> >
> > The patch 2/2 is the version Boris organized:
> > http://lkml.kernel.org/r/[email protected]
> >
> > Patch 1/1 is based on Kairui's v1 patch, and add ACPI tables mapping:
> > http://lkml.kernel.org/r/[email protected]
> >
> > Dave Young confirmed this patchset passed test on his t420 laptop, where
> > the system resetting issue caused by patch 2/2 was found.
>
> I guess but this does not give me the warm and fuzzy feeling one week
> before the merge window.
>
> So:
>
> https://git.kernel.org/tip/36f0c423552dacaca152324b8e9bda42a6d88865
>
> and we all can relax ourselves and take the next release cycle to test
> the hell out of this before reenabling it again.

OK, then let's hold till the next window opening.

2019-04-27 16:12:34

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v5 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Wed, Apr 24, 2019 at 05:29:43PM +0800, Baoquan He wrote:
> From: Kairui Song <[email protected]>
>
> The current code only builds identity mapping for physical memory during
> kexec-type loading. The regions reserved by firmware are not covered.
> In the next patch, the boot decompressing code of kexec-ed kernel tries

There's no guarantee that when this patch gets applied, the next patch
will be the one you mean. So explain what you mean here instead.

> to access EFI systab and ACPI tables, lacking identity mapping for them
> will cause error and reset system to firmware.
>
> This error doesn't happen on all systems. Because kexec enables gbpages
> to build identity mapping, the EFI systab and ACPI tables could have been
> covered if they share the same 1 GB area with physical memory. To make
> sure, we should map them always.
>
> So here add mapping for them.
>
> Signed-off-by: Kairui Song <[email protected]>

When you send someone else's patch, you need to add your SOB. Lemme
point you to

Documentation/process/submitting-patches.rst

again. Please have a deeper look.

> ---
> arch/x86/kernel/machine_kexec_64.c | 86 ++++++++++++++++++++++++++++++
> 1 file changed, 86 insertions(+)
>
> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
> index ceba408ea982..77b40c3e28d7 100644
> --- a/arch/x86/kernel/machine_kexec_64.c
> +++ b/arch/x86/kernel/machine_kexec_64.c
> @@ -18,6 +18,7 @@
> #include <linux/io.h>
> #include <linux/suspend.h>
> #include <linux/vmalloc.h>
> +#include <linux/efi.h>
>
> #include <asm/init.h>
> #include <asm/pgtable.h>
> @@ -29,6 +30,48 @@
> #include <asm/setup.h>
> #include <asm/set_memory.h>
>
> +#ifdef CONFIG_ACPI
> +/**

Two stars '**' are kernel-doc style but this comment is implementation
detail and is irrelevant for kernel-doc ouput.

> + * Used while adding mapping for ACPI tables.
> + * Can be reused when other iomem regions need be mapped
> + */
> +struct init_pgtable_data {
> + struct x86_mapping_info *info;
> + pgd_t *level4p;
> +};
> +
> +static int mem_region_callback(struct resource *res, void *arg)
> +{
> + struct init_pgtable_data *data = arg;
> + unsigned long mstart, mend;
> +
> + mstart = res->start;
> + mend = mstart + resource_size(res) - 1;
> +
> + return kernel_ident_mapping_init(data->info,
> + data->level4p, mstart, mend);

Do not break that line.

> +}
> +
> +static int init_acpi_pgtable(struct x86_mapping_info *info,
> + pgd_t *level4p)

static int
map_acpi_tables(...)

> +{
> + unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + struct init_pgtable_data data;
> +
> + data.info = info;
> + data.level4p = level4p;
> + flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
> + &data, mem_region_callback);
> +}
> +#else
> +static int init_acpi_pgtable(struct x86_mapping_info *info,
> + pgd_t *level4p)
> +{
> + return 0;
> +}
> +#endif
> +
> #ifdef CONFIG_KEXEC_FILE
> const struct kexec_file_ops * const kexec_file_loaders[] = {
> &kexec_bzImage64_ops,
> @@ -36,6 +79,37 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
> };
> #endif
>
> +#ifdef CONFIG_EFI
> +static int init_efi_systab_pgtable(struct x86_mapping_info *info,
> + pgd_t *level4p)

This function's name is wrong. Make it like this:

static int
map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
{
#ifdef CONFIG_EFI

...

#endif

return 0;
}

and drop the #else ifdeffery.


> +{
> + unsigned long mstart, mend;
> +
> + if (!efi_enabled(EFI_BOOT))
> + return 0;
> +
> + mstart = (boot_params.efi_info.efi_systab |
> + ((u64)boot_params.efi_info.efi_systab_hi<<32));
> +
> + if (efi_enabled(EFI_64BIT))
> + mend = mstart + sizeof(efi_system_table_64_t);
> + else
> + mend = mstart + sizeof(efi_system_table_32_t);
> +
> + if (mstart)
> + return kernel_ident_mapping_init(info,
> + level4p, mstart, mend);

Flip that logic:

if (!mstart)
return 0;

return kernel_ident_mapping_init(info, level4p, mstart, mend);

and let the function stick out.

> +
> + return 0;
> +}
> +#else
> +static inline int init_efi_systab_pgtable(struct x86_mapping_info *info,
> + pgd_t *level4p)
> +{
> + return 0;
> +}
> +#endif
> +
> static void free_transition_pgtable(struct kimage *image)
> {
> free_page((unsigned long)image->arch.p4d);
> @@ -159,6 +233,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
> return result;
> }
>
> + /**

Two stars '**' are kernel-doc style comments above function names but
not here.

> + * Prepare EFI systab and ACPI table mapping for kexec kernel,
> + * since they are not covered by pfn_mapped.
> + */
> + result = init_efi_systab_pgtable(&info, level4p);
> + if (result)
> + return result;
> +
> + result = init_acpi_pgtable(&info, level4p);
> + if (result)
> + return result;
> +
> return init_transition_pgtable(image, level4p);
> }
>
> --
> 2.17.2
>

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-04-28 05:46:47

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v5 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 04/27/19 at 06:11pm, Borislav Petkov wrote:
> On Wed, Apr 24, 2019 at 05:29:43PM +0800, Baoquan He wrote:
> > From: Kairui Song <[email protected]>
> >
> > The current code only builds identity mapping for physical memory during
> > kexec-type loading. The regions reserved by firmware are not covered.
> > In the next patch, the boot decompressing code of kexec-ed kernel tries
>
> There's no guarantee that when this patch gets applied, the next patch
> will be the one you mean. So explain what you mean here instead.

All agreed, will update all of them, thanks.

About this place, do you think below change is OK to you?

~~~
The current code only builds identity mapping for physical memory during
kexec-type loading. The regions reserved by firmware are not covered.
In the later code change, the boot decompressing code of kexec-ed kernel
will try to access EFI systab and ACPI tables, lacking identity mapping for
them will cause error and reset system to firmware.

Thanks
Baoquan

> >
> > This error doesn't happen on all systems. Because kexec enables gbpages
> > to build identity mapping, the EFI systab and ACPI tables could have been
> > covered if they share the same 1 GB area with physical memory. To make
> > sure, we should map them always.
> >
> > So here add mapping for them.
> >
> > Signed-off-by: Kairui Song <[email protected]>
>
> When you send someone else's patch, you need to add your SOB. Lemme
> point you to
>
> Documentation/process/submitting-patches.rst
>
> again. Please have a deeper look.
>
> > ---
> > arch/x86/kernel/machine_kexec_64.c | 86 ++++++++++++++++++++++++++++++
> > 1 file changed, 86 insertions(+)
> >
> > diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
> > index ceba408ea982..77b40c3e28d7 100644
> > --- a/arch/x86/kernel/machine_kexec_64.c
> > +++ b/arch/x86/kernel/machine_kexec_64.c
> > @@ -18,6 +18,7 @@
> > #include <linux/io.h>
> > #include <linux/suspend.h>
> > #include <linux/vmalloc.h>
> > +#include <linux/efi.h>
> >
> > #include <asm/init.h>
> > #include <asm/pgtable.h>
> > @@ -29,6 +30,48 @@
> > #include <asm/setup.h>
> > #include <asm/set_memory.h>
> >
> > +#ifdef CONFIG_ACPI
> > +/**
>
> Two stars '**' are kernel-doc style but this comment is implementation
> detail and is irrelevant for kernel-doc ouput.
>
> > + * Used while adding mapping for ACPI tables.
> > + * Can be reused when other iomem regions need be mapped
> > + */
> > +struct init_pgtable_data {
> > + struct x86_mapping_info *info;
> > + pgd_t *level4p;
> > +};
> > +
> > +static int mem_region_callback(struct resource *res, void *arg)
> > +{
> > + struct init_pgtable_data *data = arg;
> > + unsigned long mstart, mend;
> > +
> > + mstart = res->start;
> > + mend = mstart + resource_size(res) - 1;
> > +
> > + return kernel_ident_mapping_init(data->info,
> > + data->level4p, mstart, mend);
>
> Do not break that line.
>
> > +}
> > +
> > +static int init_acpi_pgtable(struct x86_mapping_info *info,
> > + pgd_t *level4p)
>
> static int
> map_acpi_tables(...)
>
> > +{
> > + unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> > + struct init_pgtable_data data;
> > +
> > + data.info = info;
> > + data.level4p = level4p;
> > + flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> > + return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
> > + &data, mem_region_callback);
> > +}
> > +#else
> > +static int init_acpi_pgtable(struct x86_mapping_info *info,
> > + pgd_t *level4p)
> > +{
> > + return 0;
> > +}
> > +#endif
> > +
> > #ifdef CONFIG_KEXEC_FILE
> > const struct kexec_file_ops * const kexec_file_loaders[] = {
> > &kexec_bzImage64_ops,
> > @@ -36,6 +79,37 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
> > };
> > #endif
> >
> > +#ifdef CONFIG_EFI
> > +static int init_efi_systab_pgtable(struct x86_mapping_info *info,
> > + pgd_t *level4p)
>
> This function's name is wrong. Make it like this:
>
> static int
> map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
> {
> #ifdef CONFIG_EFI
>
> ...
>
> #endif
>
> return 0;
> }
>
> and drop the #else ifdeffery.
>
>
> > +{
> > + unsigned long mstart, mend;
> > +
> > + if (!efi_enabled(EFI_BOOT))
> > + return 0;
> > +
> > + mstart = (boot_params.efi_info.efi_systab |
> > + ((u64)boot_params.efi_info.efi_systab_hi<<32));
> > +
> > + if (efi_enabled(EFI_64BIT))
> > + mend = mstart + sizeof(efi_system_table_64_t);
> > + else
> > + mend = mstart + sizeof(efi_system_table_32_t);
> > +
> > + if (mstart)
> > + return kernel_ident_mapping_init(info,
> > + level4p, mstart, mend);
>
> Flip that logic:
>
> if (!mstart)
> return 0;
>
> return kernel_ident_mapping_init(info, level4p, mstart, mend);
>
> and let the function stick out.
>
> > +
> > + return 0;
> > +}
> > +#else
> > +static inline int init_efi_systab_pgtable(struct x86_mapping_info *info,
> > + pgd_t *level4p)
> > +{
> > + return 0;
> > +}
> > +#endif
> > +
> > static void free_transition_pgtable(struct kimage *image)
> > {
> > free_page((unsigned long)image->arch.p4d);
> > @@ -159,6 +233,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
> > return result;
> > }
> >
> > + /**
>
> Two stars '**' are kernel-doc style comments above function names but
> not here.
>
> > + * Prepare EFI systab and ACPI table mapping for kexec kernel,
> > + * since they are not covered by pfn_mapped.
> > + */
> > + result = init_efi_systab_pgtable(&info, level4p);
> > + if (result)
> > + return result;
> > +
> > + result = init_acpi_pgtable(&info, level4p);
> > + if (result)
> > + return result;
> > +
> > return init_transition_pgtable(image, level4p);
> > }
> >
> > --
> > 2.17.2
> >
>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.

2019-04-29 00:24:30

by Baoquan He

[permalink] [raw]
Subject: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

From: Kairui Song <[email protected]>

The current code only builds identity mapping for physical memory during
kexec-type loading. The regions reserved by firmware are not covered.
In the later patch, the boot decompressing code of kexec-ed kernel tries
to access EFI systab and ACPI tables, lacking identity mapping for them
will cause error and reset system to firmware.

This error doesn't happen on all systems. Because kexec enables gbpages
to build identity mapping, the EFI systab and ACPI tables could have been
covered if they share the same 1 GB area with physical memory. To make
sure, we should map them always.

So here add mapping for them.

Signed-off-by: Kairui Song <[email protected]>
Signed-off-by: Baoquan He <[email protected]>
---
Changelog:
v5->v6:
Tune code, comments and patch log Per Boris's comments.
v5:
This patch was newly added into v5.

arch/x86/kernel/machine_kexec_64.c | 79 ++++++++++++++++++++++++++++++
1 file changed, 79 insertions(+)

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index ceba408ea982..0af01490ee2d 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
#include <linux/io.h>
#include <linux/suspend.h>
#include <linux/vmalloc.h>
+#include <linux/efi.h>

#include <asm/init.h>
#include <asm/pgtable.h>
@@ -29,6 +30,47 @@
#include <asm/setup.h>
#include <asm/set_memory.h>

+#ifdef CONFIG_ACPI
+/*
+ * Used while adding mapping for ACPI tables.
+ * Can be reused when other iomem regions need be mapped
+ */
+struct init_pgtable_data {
+ struct x86_mapping_info *info;
+ pgd_t *level4p;
+};
+
+static int mem_region_callback(struct resource *res, void *arg)
+{
+ struct init_pgtable_data *data = arg;
+ unsigned long mstart, mend;
+
+ mstart = res->start;
+ mend = mstart + resource_size(res) - 1;
+
+ return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend);
+}
+
+static int
+map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
+{
+ unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ struct init_pgtable_data data;
+
+ data.info = info;
+ data.level4p = level4p;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
+ &data, mem_region_callback);
+}
+#else
+static int init_acpi_pgtable(struct x86_mapping_info *info,
+ pgd_t *level4p)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_KEXEC_FILE
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_bzImage64_ops,
@@ -36,6 +78,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
};
#endif

+static int
+map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
+{
+#ifdef CONFIG_EFI
+ unsigned long mstart, mend;
+
+ if (!efi_enabled(EFI_BOOT))
+ return 0;
+
+ mstart = (boot_params.efi_info.efi_systab |
+ ((u64)boot_params.efi_info.efi_systab_hi<<32));
+
+ if (efi_enabled(EFI_64BIT))
+ mend = mstart + sizeof(efi_system_table_64_t);
+ else
+ mend = mstart + sizeof(efi_system_table_32_t);
+
+ if (!mstart)
+ return 0;
+
+ return kernel_ident_mapping_init(info, level4p, mstart, mend);
+#endif
+ return 0;
+}
+
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.p4d);
@@ -159,6 +226,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
return result;
}

+ /*
+ * Prepare EFI systab and ACPI table mapping for kexec kernel,
+ * since they are not covered by pfn_mapped.
+ */
+ result = map_efi_systab(&info, level4p);
+ if (result)
+ return result;
+
+ result = map_acpi_tables(&info, level4p);
+ if (result)
+ return result;
+
return init_transition_pgtable(image, level4p);
}

--
2.17.2

2019-04-29 12:53:37

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v5 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Sun, Apr 28, 2019 at 01:41:14PM +0800, Baoquan He wrote:
> About this place, do you think below change is OK to you?
>
> ~~~
> The current code only builds identity mapping for physical memory during
> kexec-type loading. The regions reserved by firmware are not covered.
> In the later code change, the boot decompressing code of kexec-ed kernel
> will try to access EFI systab and ACPI tables, lacking identity mapping for
> them will cause error and reset system to firmware.

Yap, better.

Thx.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-04-29 13:57:05

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Mon, Apr 29, 2019 at 08:23:18AM +0800, Baoquan He wrote:
> +static int
> +map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
> +{
> + unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + struct init_pgtable_data data;
> +
> + data.info = info;
> + data.level4p = level4p;
> + flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
> + &data, mem_region_callback);
> +}
> +#else
> +static int init_acpi_pgtable(struct x86_mapping_info *info,

Did you at least build-test the !CONFIG_ACPI case?

arch/x86/kernel/machine_kexec_64.c: In function ‘init_pgtable’:
arch/x86/kernel/machine_kexec_64.c:237:11: error: implicit declaration of function ‘map_acpi_tables’; did you mean ‘init_acpi_pgtable’? [-Werror=implicit-function-declaration]
result = map_acpi_tables(&info, level4p);
^~~~~~~~~~~~~~~
init_acpi_pgtable


I don't think so. ;-(

Sigh, next time at least build-test your patch before hurrying it out. I
fixed it up along with decyphering the commit message:

---
From: Kairui Song <[email protected]>
Date: Mon, 29 Apr 2019 08:23:18 +0800
Subject: [PATCH] x86/kexec: Add the EFI system tables and ACPI tables to the ident map

Currently, only the whole physical memory is identity-mapped for the
kexec kernel and the regions reserved by firmware are ignored.

However, the recent addition of RSDP parsing in the decompression stage
and especially:

33f0df8d843d ("x86/boot: Search for RSDP in the EFI tables")

which tries to access EFI system tables and to dig out the RDSP address
from there, becomes a problem because in certain configurations, they
might not be mapped in the kexec'ed kernel's address space.

What is more, this problem doesn't appear on all systems because the
kexec kernel uses gigabyte pages to build the identity mapping. And
the EFI system tables and ACPI tables can, depending on the system
configuration, end up being mapped as part of all physical memory, if
they share the same 1 GB area with the physical memory.

Therefore, make sure they're always mapped.

[ bp: productize half-baked patch:
- rewrite commit message.
- s/init_acpi_pgtable/map_acpi_tables/ in the !ACPI case. ]

Signed-off-by: Kairui Song <[email protected]>
Signed-off-by: Baoquan He <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: "Kirill A. Shutemov" <[email protected]>
Cc: Lianbo Jiang <[email protected]>
Cc: Tetsuo Handa <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: x86-ml <[email protected]>
Link: https://lkml.kernel.org/r/20190429002318.GA25400@MiWiFi-R3L-srv
---
arch/x86/kernel/machine_kexec_64.c | 75 ++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index ceba408ea982..3c77bdf7b32a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
#include <linux/io.h>
#include <linux/suspend.h>
#include <linux/vmalloc.h>
+#include <linux/efi.h>

#include <asm/init.h>
#include <asm/pgtable.h>
@@ -29,6 +30,43 @@
#include <asm/setup.h>
#include <asm/set_memory.h>

+#ifdef CONFIG_ACPI
+/*
+ * Used while adding mapping for ACPI tables.
+ * Can be reused when other iomem regions need be mapped
+ */
+struct init_pgtable_data {
+ struct x86_mapping_info *info;
+ pgd_t *level4p;
+};
+
+static int mem_region_callback(struct resource *res, void *arg)
+{
+ struct init_pgtable_data *data = arg;
+ unsigned long mstart, mend;
+
+ mstart = res->start;
+ mend = mstart + resource_size(res) - 1;
+
+ return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend);
+}
+
+static int
+map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
+{
+ unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ struct init_pgtable_data data;
+
+ data.info = info;
+ data.level4p = level4p;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
+ &data, mem_region_callback);
+}
+#else
+static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }
+#endif
+
#ifdef CONFIG_KEXEC_FILE
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_bzImage64_ops,
@@ -36,6 +74,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
};
#endif

+static int
+map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
+{
+#ifdef CONFIG_EFI
+ unsigned long mstart, mend;
+
+ if (!efi_enabled(EFI_BOOT))
+ return 0;
+
+ mstart = (boot_params.efi_info.efi_systab |
+ ((u64)boot_params.efi_info.efi_systab_hi<<32));
+
+ if (efi_enabled(EFI_64BIT))
+ mend = mstart + sizeof(efi_system_table_64_t);
+ else
+ mend = mstart + sizeof(efi_system_table_32_t);
+
+ if (!mstart)
+ return 0;
+
+ return kernel_ident_mapping_init(info, level4p, mstart, mend);
+#endif
+ return 0;
+}
+
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.p4d);
@@ -159,6 +222,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
return result;
}

+ /*
+ * Prepare EFI systab and ACPI tables for kexec kernel since they are
+ * not covered by pfn_mapped.
+ */
+ result = map_efi_systab(&info, level4p);
+ if (result)
+ return result;
+
+ result = map_acpi_tables(&info, level4p);
+ if (result)
+ return result;
+
return init_transition_pgtable(image, level4p);
}

--
2.21.0

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-04-29 14:20:20

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 04/29/19 at 03:55pm, Borislav Petkov wrote:
> On Mon, Apr 29, 2019 at 08:23:18AM +0800, Baoquan He wrote:
> > +static int
> > +map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
> > +{
> > + unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> > + struct init_pgtable_data data;
> > +
> > + data.info = info;
> > + data.level4p = level4p;
> > + flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> > + return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
> > + &data, mem_region_callback);
> > +}
> > +#else
> > +static int init_acpi_pgtable(struct x86_mapping_info *info,
>
> Did you at least build-test the !CONFIG_ACPI case?
>
> arch/x86/kernel/machine_kexec_64.c: In function ‘init_pgtable’:
> arch/x86/kernel/machine_kexec_64.c:237:11: error: implicit declaration of function ‘map_acpi_tables’; did you mean ‘init_acpi_pgtable’? [-Werror=implicit-function-declaration]
> result = map_acpi_tables(&info, level4p);
> ^~~~~~~~~~~~~~~
> init_acpi_pgtable
>
>
> I don't think so. ;-(
>
> Sigh, next time at least build-test your patch before hurrying it out. I
> fixed it up along with decyphering the commit message:

Sorry, thought them simple, didn't build !CONFIG_ACPI case. Should be
more careful.

Thanks for fixing it and the log rewriting.

>
> ---
> From: Kairui Song <[email protected]>
> Date: Mon, 29 Apr 2019 08:23:18 +0800
> Subject: [PATCH] x86/kexec: Add the EFI system tables and ACPI tables to the ident map
>
> Currently, only the whole physical memory is identity-mapped for the
> kexec kernel and the regions reserved by firmware are ignored.
>
> However, the recent addition of RSDP parsing in the decompression stage
> and especially:
>
> 33f0df8d843d ("x86/boot: Search for RSDP in the EFI tables")
>
> which tries to access EFI system tables and to dig out the RDSP address
> from there, becomes a problem because in certain configurations, they
> might not be mapped in the kexec'ed kernel's address space.
>
> What is more, this problem doesn't appear on all systems because the
> kexec kernel uses gigabyte pages to build the identity mapping. And
> the EFI system tables and ACPI tables can, depending on the system
> configuration, end up being mapped as part of all physical memory, if
> they share the same 1 GB area with the physical memory.
>
> Therefore, make sure they're always mapped.
>
> [ bp: productize half-baked patch:
> - rewrite commit message.
> - s/init_acpi_pgtable/map_acpi_tables/ in the !ACPI case. ]
>
> Signed-off-by: Kairui Song <[email protected]>
> Signed-off-by: Baoquan He <[email protected]>
> Signed-off-by: Borislav Petkov <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: "H. Peter Anvin" <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: "Kirill A. Shutemov" <[email protected]>
> Cc: Lianbo Jiang <[email protected]>
> Cc: Tetsuo Handa <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: x86-ml <[email protected]>
> Link: https://lkml.kernel.org/r/20190429002318.GA25400@MiWiFi-R3L-srv
> ---
> arch/x86/kernel/machine_kexec_64.c | 75 ++++++++++++++++++++++++++++++
> 1 file changed, 75 insertions(+)
>
> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
> index ceba408ea982..3c77bdf7b32a 100644
> --- a/arch/x86/kernel/machine_kexec_64.c
> +++ b/arch/x86/kernel/machine_kexec_64.c
> @@ -18,6 +18,7 @@
> #include <linux/io.h>
> #include <linux/suspend.h>
> #include <linux/vmalloc.h>
> +#include <linux/efi.h>
>
> #include <asm/init.h>
> #include <asm/pgtable.h>
> @@ -29,6 +30,43 @@
> #include <asm/setup.h>
> #include <asm/set_memory.h>
>
> +#ifdef CONFIG_ACPI
> +/*
> + * Used while adding mapping for ACPI tables.
> + * Can be reused when other iomem regions need be mapped
> + */
> +struct init_pgtable_data {
> + struct x86_mapping_info *info;
> + pgd_t *level4p;
> +};
> +
> +static int mem_region_callback(struct resource *res, void *arg)
> +{
> + struct init_pgtable_data *data = arg;
> + unsigned long mstart, mend;
> +
> + mstart = res->start;
> + mend = mstart + resource_size(res) - 1;
> +
> + return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend);
> +}
> +
> +static int
> +map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
> +{
> + unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + struct init_pgtable_data data;
> +
> + data.info = info;
> + data.level4p = level4p;
> + flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
> + &data, mem_region_callback);
> +}
> +#else
> +static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }
> +#endif
> +
> #ifdef CONFIG_KEXEC_FILE
> const struct kexec_file_ops * const kexec_file_loaders[] = {
> &kexec_bzImage64_ops,
> @@ -36,6 +74,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
> };
> #endif
>
> +static int
> +map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
> +{
> +#ifdef CONFIG_EFI
> + unsigned long mstart, mend;
> +
> + if (!efi_enabled(EFI_BOOT))
> + return 0;
> +
> + mstart = (boot_params.efi_info.efi_systab |
> + ((u64)boot_params.efi_info.efi_systab_hi<<32));
> +
> + if (efi_enabled(EFI_64BIT))
> + mend = mstart + sizeof(efi_system_table_64_t);
> + else
> + mend = mstart + sizeof(efi_system_table_32_t);
> +
> + if (!mstart)
> + return 0;
> +
> + return kernel_ident_mapping_init(info, level4p, mstart, mend);
> +#endif
> + return 0;
> +}
> +
> static void free_transition_pgtable(struct kimage *image)
> {
> free_page((unsigned long)image->arch.p4d);
> @@ -159,6 +222,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
> return result;
> }
>
> + /*
> + * Prepare EFI systab and ACPI tables for kexec kernel since they are
> + * not covered by pfn_mapped.
> + */
> + result = map_efi_systab(&info, level4p);
> + if (result)
> + return result;
> +
> + result = map_acpi_tables(&info, level4p);
> + if (result)
> + return result;
> +
> return init_transition_pgtable(image, level4p);
> }
>
> --
> 2.21.0
>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.

2019-05-13 01:58:25

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

Hi Boris,

On 04/29/19 at 03:55pm, Borislav Petkov wrote:
> From: Kairui Song <[email protected]>
> Date: Mon, 29 Apr 2019 08:23:18 +0800
> Subject: [PATCH] x86/kexec: Add the EFI system tables and ACPI tables to the ident map

>
> Currently, only the whole physical memory is identity-mapped for the
> kexec kernel and the regions reserved by firmware are ignored.
>
> However, the recent addition of RSDP parsing in the decompression stage
> and especially:
>
> 33f0df8d843d ("x86/boot: Search for RSDP in the EFI tables")
>
> which tries to access EFI system tables and to dig out the RDSP address
> from there, becomes a problem because in certain configurations, they
> might not be mapped in the kexec'ed kernel's address space.
>
> What is more, this problem doesn't appear on all systems because the
> kexec kernel uses gigabyte pages to build the identity mapping. And
> the EFI system tables and ACPI tables can, depending on the system
> configuration, end up being mapped as part of all physical memory, if
> they share the same 1 GB area with the physical memory.
>
> Therefore, make sure they're always mapped.
>
> [ bp: productize half-baked patch:
> - rewrite commit message.
> - s/init_acpi_pgtable/map_acpi_tables/ in the !ACPI case. ]

Can this patchset be merged, or picked into tip?

Thanks
Baoquan

> Signed-off-by: Kairui Song <[email protected]>
> Signed-off-by: Baoquan He <[email protected]>
> Signed-off-by: Borislav Petkov <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: "H. Peter Anvin" <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: "Kirill A. Shutemov" <[email protected]>
> Cc: Lianbo Jiang <[email protected]>
> Cc: Tetsuo Handa <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: x86-ml <[email protected]>
> Link: https://lkml.kernel.org/r/20190429002318.GA25400@MiWiFi-R3L-srv
> ---
> arch/x86/kernel/machine_kexec_64.c | 75 ++++++++++++++++++++++++++++++
> 1 file changed, 75 insertions(+)
>
> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
> index ceba408ea982..3c77bdf7b32a 100644
> --- a/arch/x86/kernel/machine_kexec_64.c
> +++ b/arch/x86/kernel/machine_kexec_64.c
> @@ -18,6 +18,7 @@
> #include <linux/io.h>
> #include <linux/suspend.h>
> #include <linux/vmalloc.h>
> +#include <linux/efi.h>
>
> #include <asm/init.h>
> #include <asm/pgtable.h>
> @@ -29,6 +30,43 @@
> #include <asm/setup.h>
> #include <asm/set_memory.h>
>
> +#ifdef CONFIG_ACPI
> +/*
> + * Used while adding mapping for ACPI tables.
> + * Can be reused when other iomem regions need be mapped
> + */
> +struct init_pgtable_data {
> + struct x86_mapping_info *info;
> + pgd_t *level4p;
> +};
> +
> +static int mem_region_callback(struct resource *res, void *arg)
> +{
> + struct init_pgtable_data *data = arg;
> + unsigned long mstart, mend;
> +
> + mstart = res->start;
> + mend = mstart + resource_size(res) - 1;
> +
> + return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend);
> +}
> +
> +static int
> +map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
> +{
> + unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + struct init_pgtable_data data;
> +
> + data.info = info;
> + data.level4p = level4p;
> + flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
> + &data, mem_region_callback);
> +}
> +#else
> +static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }
> +#endif
> +
> #ifdef CONFIG_KEXEC_FILE
> const struct kexec_file_ops * const kexec_file_loaders[] = {
> &kexec_bzImage64_ops,
> @@ -36,6 +74,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
> };
> #endif
>
> +static int
> +map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
> +{
> +#ifdef CONFIG_EFI
> + unsigned long mstart, mend;
> +
> + if (!efi_enabled(EFI_BOOT))
> + return 0;
> +
> + mstart = (boot_params.efi_info.efi_systab |
> + ((u64)boot_params.efi_info.efi_systab_hi<<32));
> +
> + if (efi_enabled(EFI_64BIT))
> + mend = mstart + sizeof(efi_system_table_64_t);
> + else
> + mend = mstart + sizeof(efi_system_table_32_t);
> +
> + if (!mstart)
> + return 0;
> +
> + return kernel_ident_mapping_init(info, level4p, mstart, mend);
> +#endif
> + return 0;
> +}
> +
> static void free_transition_pgtable(struct kimage *image)
> {
> free_page((unsigned long)image->arch.p4d);
> @@ -159,6 +222,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
> return result;
> }
>
> + /*
> + * Prepare EFI systab and ACPI tables for kexec kernel since they are
> + * not covered by pfn_mapped.
> + */
> + result = map_efi_systab(&info, level4p);
> + if (result)
> + return result;
> +
> + result = map_acpi_tables(&info, level4p);
> + if (result)
> + return result;
> +
> return init_transition_pgtable(image, level4p);
> }
>
> --
> 2.21.0
>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.

2019-05-13 07:42:13

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

Baoquan,

On Mon, May 13, 2019 at 09:43:05AM +0800, Baoquan He wrote:
> Can this patchset be merged, or picked into tip?

what is this thing that happens everytime after a kernel is released and
lasts for approximately 2 weeks?

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-05-13 08:31:15

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 05/13/19 at 09:50am, Borislav Petkov wrote:
> On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
> > This is a critical bug which breaks memory hotplug,
>
> Please concentrate and stop the blabla:
>
> 36f0c423552d ("x86/boot: Disable RSDP parsing temporarily")
>
> already explains what the deal is. This code was *purposefully* disabled
> because we ran out of time and it broke a couple of machines. Don't make
> me repeat all that - you were on CC on *all* threads and messages!
>
> So we're going to try it again this cycle and if there's no fallout, it
> will go upstream. If not, it will have to be fixed. The usual thing.
>
> And I don't care if Kairui's patch fixes this one problem - judging by
> the fragility of this whole thing, it should be hammered on one more
> cycle on as many boxes as possible to make sure there's no other SNAFUs.
>
> So go test it on more machines instead. I've pushed it here:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git/log/?h=next-merge-window

Pingfan has got a machine to reproduce the kexec breakage issue, and
applying these two patches fix it. He planned to paste the test result.
I will ask him to try this branch if he has time, or I can get his
machine to test.

Junichi, also have a try on Boris's branch in NEC's test environment?

Thanks
Baoquan

2019-05-13 08:38:25

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

Hi Dave,

On 05/13/19 at 09:50am, Borislav Petkov wrote:
> On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
> > This is a critical bug which breaks memory hotplug,
>
> Please concentrate and stop the blabla:
>
> 36f0c423552d ("x86/boot: Disable RSDP parsing temporarily")
>
> already explains what the deal is. This code was *purposefully* disabled
> because we ran out of time and it broke a couple of machines. Don't make

I remember your machine is the one on whihc the issue is reported. Could
you also test it and confirm if these all things found ealier are
cleared out?

Thanks
Baoquan

> me repeat all that - you were on CC on *all* threads and messages!
>
> So we're going to try it again this cycle and if there's no fallout, it
> will go upstream. If not, it will have to be fixed. The usual thing.
>
> And I don't care if Kairui's patch fixes this one problem - judging by
> the fragility of this whole thing, it should be hammered on one more
> cycle on as many boxes as possible to make sure there's no other SNAFUs.
>
> So go test it on more machines instead. I've pushed it here:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git/log/?h=next-merge-window
>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.

2019-05-13 09:30:29

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 05/13/19 at 09:07am, Borislav Petkov wrote:
> Baoquan,
>
> On Mon, May 13, 2019 at 09:43:05AM +0800, Baoquan He wrote:
> > Can this patchset be merged, or picked into tip?
>
> what is this thing that happens everytime after a kernel is released and
> lasts for approximately 2 weeks?

This is a critical bug which breaks memory hotplug, since KASLR is
enabled by default in upstream, and in our distros too. You can see that
Junichi posted the patch after NEC must have tested the code and found
the new issue. And Chao from FJ also worked out the patches to fix the bug.
And I have tracking bugs at hand from other important customers, related
to this fix too. The back porting of Chao's patches into our distros are
blocked by these two. We gonna miss another due date we promised to customers.

Thanks
Baoquan

2019-05-13 09:42:44

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
> This is a critical bug which breaks memory hotplug,

Please concentrate and stop the blabla:

36f0c423552d ("x86/boot: Disable RSDP parsing temporarily")

already explains what the deal is. This code was *purposefully* disabled
because we ran out of time and it broke a couple of machines. Don't make
me repeat all that - you were on CC on *all* threads and messages!

So we're going to try it again this cycle and if there's no fallout, it
will go upstream. If not, it will have to be fixed. The usual thing.

And I don't care if Kairui's patch fixes this one problem - judging by
the fragility of this whole thing, it should be hammered on one more
cycle on as many boxes as possible to make sure there's no other SNAFUs.

So go test it on more machines instead. I've pushed it here:

https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git/log/?h=next-merge-window

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-05-14 03:27:17

by Dave Young

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 05/13/19 at 04:06pm, Baoquan He wrote:
> Hi Dave,
>
> On 05/13/19 at 09:50am, Borislav Petkov wrote:
> > On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
> > > This is a critical bug which breaks memory hotplug,
> >
> > Please concentrate and stop the blabla:
> >
> > 36f0c423552d ("x86/boot: Disable RSDP parsing temporarily")
> >
> > already explains what the deal is. This code was *purposefully* disabled
> > because we ran out of time and it broke a couple of machines. Don't make
>
> I remember your machine is the one on whihc the issue is reported. Could
> you also test it and confirm if these all things found ealier are
> cleared out?
>

I did some tests on the laptop, thing is:
1. apply the 3 patches (two you posted + Boris's revert commit 52b922c3d49c)
on latest Linus master branch, everything works fine.

2. build and test the tip/next-merge-window branch, kernel hangs early
without output, (both 1st boot and kexec boot)

So I think these 3 patches are good, but there could be other issues
which is not related to the problem we saw.

Another thing is we can move the get rsdp after console_init, but that
can be done later as separate patch.

Thanks
Dave

2019-05-14 03:34:51

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 05/14/19 at 11:22am, Dave Young wrote:
> On 05/13/19 at 04:06pm, Baoquan He wrote:
> > Hi Dave,
> >
> > On 05/13/19 at 09:50am, Borislav Petkov wrote:
> > > On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
> > > > This is a critical bug which breaks memory hotplug,
> > >
> > > Please concentrate and stop the blabla:
> > >
> > > 36f0c423552d ("x86/boot: Disable RSDP parsing temporarily")
> > >
> > > already explains what the deal is. This code was *purposefully* disabled
> > > because we ran out of time and it broke a couple of machines. Don't make
> >
> > I remember your machine is the one on whihc the issue is reported. Could
> > you also test it and confirm if these all things found ealier are
> > cleared out?
> >
>
> I did some tests on the laptop, thing is:
> 1. apply the 3 patches (two you posted + Boris's revert commit 52b922c3d49c)
> on latest Linus master branch, everything works fine.
>
> 2. build and test the tip/next-merge-window branch, kernel hangs early
> without output, (both 1st boot and kexec boot)

Thanks, Dave.

Yeah, I also tested on a HP machine, problem reprodued on the current
master branch when revert commit 52b922c3d49c.

Then apply these two patches, problem solved.

Tried boris's next-merge-window branch too, kexec works very well.

Dirk, Junichi, feel free to add your test result if you have time.

>
> Another thing is we can move the get rsdp after console_init, but that
> can be done later as separate patch.

Yes, agree.

2019-05-14 08:51:40

by Dave Young

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 05/14/19 at 11:22am, Dave Young wrote:
> On 05/13/19 at 04:06pm, Baoquan He wrote:
> > Hi Dave,
> >
> > On 05/13/19 at 09:50am, Borislav Petkov wrote:
> > > On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
> > > > This is a critical bug which breaks memory hotplug,
> > >
> > > Please concentrate and stop the blabla:
> > >
> > > 36f0c423552d ("x86/boot: Disable RSDP parsing temporarily")
> > >
> > > already explains what the deal is. This code was *purposefully* disabled
> > > because we ran out of time and it broke a couple of machines. Don't make
> >
> > I remember your machine is the one on whihc the issue is reported. Could
> > you also test it and confirm if these all things found ealier are
> > cleared out?
> >
>
> I did some tests on the laptop, thing is:
> 1. apply the 3 patches (two you posted + Boris's revert commit 52b922c3d49c)
> on latest Linus master branch, everything works fine.
>
> 2. build and test the tip/next-merge-window branch, kernel hangs early
> without output, (both 1st boot and kexec boot)

Update about 2. It should be not early rsdp related, I got the boot log
Since can not reproduce with Linus master branch it may have been fixed.

[ 0.685374][ T1] rcu: Hierarchical SRCU implementation.
[ 0.686414][ T1] general protection fault: 0000 [#1] SMP PTI
[ 0.687328][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.1.0-rc6+ #877
[ 0.687328][ T1] Hardware name: LENOVO 4236NUC/4236NUC, BIOS 83ET82WW (1.52 ) 06/04/2018
[ 0.687328][ T1] RIP: 0010:reserve_ds_buffers+0x34e/0x450
[ 0.687328][ T1] Code: e8 77 49 1a 00 4c 8b 54 24 18 48 85 c0 4c 8b 4c 24 10 48 89 44 24 20 0f 84 68 fe ff ff 4a 8b 0c cd 20 88 07 a7 48 8d 54 24 20 <48> 89 04 11 e9 e5 fd ff ff 85 db 0f 85 67 fe ff ff 45 85 ed 75 24
[ 0.687328][ T1] RSP: 0000:ffffa52100c8bd90 EFLAGS: 00010286
[ 0.687328][ T1] RAX: ffff8c6bd5d03000 RBX: 0000000000000000 RCX: ffff8c6bd6000000
[ 0.687328][ T1] RDX: ffffa52100c8bdb0 RSI: ffffffffa620e209 RDI: ffff8c6bd5d04000
[ 0.687328][ T1] RBP: ffff8c6bd60103a0 R08: ffff8c6bd4da0000 R09: 0000000000000000
[ 0.687328][ T1] R10: ffff8c6bd4e20000 R11: ffffc8e248538800 R12: 00000000000103a0
[ 0.687328][ T1] R13: 0000000000010000 R14: fffffe0000013000 R15: 0000000000000000
[ 0.687328][ T1] FS: 0000000000000000(0000) GS:ffff8c6bd6000000(0000) knlGS:0000000000000000
[ 0.687328][ T1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 0.687328][ T1] CR2: ffff8c6bde5ff000 CR3: 000000015700e001 CR4: 00000000000606f0
[ 0.687328][ T1] Call Trace:
[ 0.687328][ T1] ? hardlockup_detector_event_create+0x50/0x50
[ 0.687328][ T1] x86_reserve_hardware+0x173/0x180
[ 0.687328][ T1] x86_pmu_event_init+0x39/0x220
[ 0.687328][ T1] ? hardlockup_detector_event_create+0x50/0x50
[ 0.687328][ T1] perf_try_init_event+0x42/0xd0
[ 0.687328][ T1] perf_event_alloc+0x46a/0x8b0
[ 0.687328][ T1] perf_event_create_kernel_counter+0x21/0x130
[ 0.687328][ T1] hardlockup_detector_event_create+0x39/0x50
[ 0.687328][ T1] hardlockup_detector_perf_init+0xc/0x40
[ 0.687328][ T1] lockup_detector_init+0x3a/0x71
[ 0.687328][ T1] kernel_init_freeable+0xbc/0x231
[ 0.687328][ T1] ? rest_init+0x9f/0x9f
[ 0.687328][ T1] kernel_init+0xa/0x101
[ 0.687328][ T1] ret_from_fork+0x35/0x40
[ 0.687328][ T1] Modules linked in:
[ 0.687331][ T1] ---[ end trace 71ee47f6125e74a4 ]---
[ 0.688331][ T1] RIP: 0010:reserve_ds_buffers+0x34e/0x450
[ 0.689330][ T1] Code: e8 77 49 1a 00 4c 8b 54 24 18 48 85 c0 4c 8b 4c 24 10 48 89 44 24 20 0f 84 68 fe ff ff 4a 8b 0c cd 20 88 07 a7 48 8d 54 24 20 <48> 89 04 11 e9 e5 fd ff ff 85 db 0f 85 67 fe ff ff 45 85 ed 75 24
[ 0.690330][ T1] RSP: 0000:ffffa52100c8bd90 EFLAGS: 00010286
[ 0.691330][ T1] RAX: ffff8c6bd5d03000 RBX: 0000000000000000 RCX: ffff8c6bd6000000
[ 0.692330][ T1] RDX: ffffa52100c8bdb0 RSI: ffffffffa620e209 RDI: ffff8c6bd5d04000
[ 0.693330][ T1] RBP: ffff8c6bd60103a0 R08: ffff8c6bd4da0000 R09: 0000000000000000
[ 0.694330][ T1] R10: ffff8c6bd4e20000 R11: ffffc8e248538800 R12: 00000000000103a0
[ 0.695330][ T1] R13: 0000000000010000 R14: fffffe0000013000 R15: 0000000000000000
[ 0.696330][ T1] FS: 0000000000000000(0000) GS:ffff8c6bd6000000(0000) knlGS:0000000000000000
[ 0.697330][ T1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 0.698330][ T1] CR2: ffff8c6bde5ff000 CR3: 000000015700e001 CR4: 00000000000606f0
[ 0.699334][ T1] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[ 0.700328][ T1] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---

Thanks
Dave

2019-05-14 11:21:39

by Kairui Song

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Tue, May 14, 2019 at 4:48 PM Dave Young <[email protected]> wrote:
>
> On 05/14/19 at 11:22am, Dave Young wrote:
> > On 05/13/19 at 04:06pm, Baoquan He wrote:
> > > Hi Dave,
> > >
> > > On 05/13/19 at 09:50am, Borislav Petkov wrote:
> > > > On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
> > > > > This is a critical bug which breaks memory hotplug,
> > > >
> > > > Please concentrate and stop the blabla:
> > > >
> > > > 36f0c423552d ("x86/boot: Disable RSDP parsing temporarily")
> > > >
> > > > already explains what the deal is. This code was *purposefully* disabled
> > > > because we ran out of time and it broke a couple of machines. Don't make
> > >
> > > I remember your machine is the one on whihc the issue is reported. Could
> > > you also test it and confirm if these all things found ealier are
> > > cleared out?
> > >
> >
> > I did some tests on the laptop, thing is:
> > 1. apply the 3 patches (two you posted + Boris's revert commit 52b922c3d49c)
> > on latest Linus master branch, everything works fine.
> >
> > 2. build and test the tip/next-merge-window branch, kernel hangs early
> > without output, (both 1st boot and kexec boot)
>
> Update about 2. It should be not early rsdp related, I got the boot log
> Since can not reproduce with Linus master branch it may have been fixed.
>
> [ 0.685374][ T1] rcu: Hierarchical SRCU implementation.
> [ 0.686414][ T1] general protection fault: 0000 [#1] SMP PTI
> [ 0.687328][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.1.0-rc6+ #877
> [ 0.687328][ T1] Hardware name: LENOVO 4236NUC/4236NUC, BIOS 83ET82WW (1.52 ) 06/04/2018
> [ 0.687328][ T1] RIP: 0010:reserve_ds_buffers+0x34e/0x450
> [ 0.687328][ T1] Code: e8 77 49 1a 00 4c 8b 54 24 18 48 85 c0 4c 8b 4c 24 10 48 89 44 24 20 0f 84 68 fe ff ff 4a 8b 0c cd 20 88 07 a7 48 8d 54 24 20 <48> 89 04 11 e9 e5 fd ff ff 85 db 0f 85 67 fe ff ff 45 85 ed 75 24
> [ 0.687328][ T1] RSP: 0000:ffffa52100c8bd90 EFLAGS: 00010286
> [ 0.687328][ T1] RAX: ffff8c6bd5d03000 RBX: 0000000000000000 RCX: ffff8c6bd6000000
> [ 0.687328][ T1] RDX: ffffa52100c8bdb0 RSI: ffffffffa620e209 RDI: ffff8c6bd5d04000
> [ 0.687328][ T1] RBP: ffff8c6bd60103a0 R08: ffff8c6bd4da0000 R09: 0000000000000000
> [ 0.687328][ T1] R10: ffff8c6bd4e20000 R11: ffffc8e248538800 R12: 00000000000103a0
> [ 0.687328][ T1] R13: 0000000000010000 R14: fffffe0000013000 R15: 0000000000000000
> [ 0.687328][ T1] FS: 0000000000000000(0000) GS:ffff8c6bd6000000(0000) knlGS:0000000000000000
> [ 0.687328][ T1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 0.687328][ T1] CR2: ffff8c6bde5ff000 CR3: 000000015700e001 CR4: 00000000000606f0
> [ 0.687328][ T1] Call Trace:
> [ 0.687328][ T1] ? hardlockup_detector_event_create+0x50/0x50
> [ 0.687328][ T1] x86_reserve_hardware+0x173/0x180
> [ 0.687328][ T1] x86_pmu_event_init+0x39/0x220
> [ 0.687328][ T1] ? hardlockup_detector_event_create+0x50/0x50
> [ 0.687328][ T1] perf_try_init_event+0x42/0xd0
> [ 0.687328][ T1] perf_event_alloc+0x46a/0x8b0
> [ 0.687328][ T1] perf_event_create_kernel_counter+0x21/0x130
> [ 0.687328][ T1] hardlockup_detector_event_create+0x39/0x50
> [ 0.687328][ T1] hardlockup_detector_perf_init+0xc/0x40
> [ 0.687328][ T1] lockup_detector_init+0x3a/0x71
> [ 0.687328][ T1] kernel_init_freeable+0xbc/0x231
> [ 0.687328][ T1] ? rest_init+0x9f/0x9f
> [ 0.687328][ T1] kernel_init+0xa/0x101
> [ 0.687328][ T1] ret_from_fork+0x35/0x40
> [ 0.687328][ T1] Modules linked in:
> [ 0.687331][ T1] ---[ end trace 71ee47f6125e74a4 ]---
> [ 0.688331][ T1] RIP: 0010:reserve_ds_buffers+0x34e/0x450
> [ 0.689330][ T1] Code: e8 77 49 1a 00 4c 8b 54 24 18 48 85 c0 4c 8b 4c 24 10 48 89 44 24 20 0f 84 68 fe ff ff 4a 8b 0c cd 20 88 07 a7 48 8d 54 24 20 <48> 89 04 11 e9 e5 fd ff ff 85 db 0f 85 67 fe ff ff 45 85 ed 75 24
> [ 0.690330][ T1] RSP: 0000:ffffa52100c8bd90 EFLAGS: 00010286
> [ 0.691330][ T1] RAX: ffff8c6bd5d03000 RBX: 0000000000000000 RCX: ffff8c6bd6000000
> [ 0.692330][ T1] RDX: ffffa52100c8bdb0 RSI: ffffffffa620e209 RDI: ffff8c6bd5d04000
> [ 0.693330][ T1] RBP: ffff8c6bd60103a0 R08: ffff8c6bd4da0000 R09: 0000000000000000
> [ 0.694330][ T1] R10: ffff8c6bd4e20000 R11: ffffc8e248538800 R12: 00000000000103a0
> [ 0.695330][ T1] R13: 0000000000010000 R14: fffffe0000013000 R15: 0000000000000000
> [ 0.696330][ T1] FS: 0000000000000000(0000) GS:ffff8c6bd6000000(0000) knlGS:0000000000000000
> [ 0.697330][ T1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 0.698330][ T1] CR2: ffff8c6bde5ff000 CR3: 000000015700e001 CR4: 00000000000606f0
> [ 0.699334][ T1] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> [ 0.700328][ T1] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>
> Thanks
> Dave

I can confirm as I got same result on my T420. next-merge-window
branch fails both normal boot and kexec...
I didn't manage to get a working serial console, but the behavior is
the same so should be the same issue.

Also after "git cherry-pick de01951c8d40^..next-merge-window" on
master branch, it worked well, so the patch should be good.

--
Best Regards,
Kairui Song

2019-05-14 11:41:48

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Tue, May 14, 2019 at 04:48:41PM +0800, Dave Young wrote:

> > I did some tests on the laptop, thing is:
> > 1. apply the 3 patches (two you posted + Boris's revert commit 52b922c3d49c)
> > on latest Linus master branch, everything works fine.
> >
> > 2. build and test the tip/next-merge-window branch, kernel hangs early
> > without output, (both 1st boot and kexec boot)
>
> Update about 2. It should be not early rsdp related, I got the boot log
> Since can not reproduce with Linus master branch it may have been fixed.

Nothing was changed here since PTI.

> [ 0.685374][ T1] rcu: Hierarchical SRCU implementation.
> [ 0.686414][ T1] general protection fault: 0000 [#1] SMP PTI
> [ 0.687328][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.1.0-rc6+ #877
> [ 0.687328][ T1] Hardware name: LENOVO 4236NUC/4236NUC, BIOS 83ET82WW (1.52 ) 06/04/2018
> [ 0.687328][ T1] RIP: 0010:reserve_ds_buffers+0x34e/0x450

> [ 0.687328][ T1] Call Trace:
> [ 0.687328][ T1] ? hardlockup_detector_event_create+0x50/0x50
> [ 0.687328][ T1] x86_reserve_hardware+0x173/0x180
> [ 0.687328][ T1] x86_pmu_event_init+0x39/0x220

The DS buffers are special in that they're part of cpu_entrt_area. If
this comes apart it might mean your pagetables are dodgy.

2019-05-14 13:01:30

by Dave Young

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 05/14/19 at 01:38pm, Peter Zijlstra wrote:
> On Tue, May 14, 2019 at 04:48:41PM +0800, Dave Young wrote:
>
> > > I did some tests on the laptop, thing is:
> > > 1. apply the 3 patches (two you posted + Boris's revert commit 52b922c3d49c)
> > > on latest Linus master branch, everything works fine.
> > >
> > > 2. build and test the tip/next-merge-window branch, kernel hangs early
> > > without output, (both 1st boot and kexec boot)
> >
> > Update about 2. It should be not early rsdp related, I got the boot log
> > Since can not reproduce with Linus master branch it may have been fixed.
>
> Nothing was changed here since PTI.
>
> > [ 0.685374][ T1] rcu: Hierarchical SRCU implementation.
> > [ 0.686414][ T1] general protection fault: 0000 [#1] SMP PTI
> > [ 0.687328][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.1.0-rc6+ #877
> > [ 0.687328][ T1] Hardware name: LENOVO 4236NUC/4236NUC, BIOS 83ET82WW (1.52 ) 06/04/2018
> > [ 0.687328][ T1] RIP: 0010:reserve_ds_buffers+0x34e/0x450
>
> > [ 0.687328][ T1] Call Trace:
> > [ 0.687328][ T1] ? hardlockup_detector_event_create+0x50/0x50
> > [ 0.687328][ T1] x86_reserve_hardware+0x173/0x180
> > [ 0.687328][ T1] x86_pmu_event_init+0x39/0x220
>
> The DS buffers are special in that they're part of cpu_entrt_area. If
> this comes apart it might mean your pagetables are dodgy.

Hmm, it seems caused by some WIP branch patches, I suspect below:
commit 124d6af5a5f559e516ed2c6ea857e889ed293b43
x86/paravirt: Standardize 'insn_buff' variable names

The suspicious line is "per_cpu(insn_buff, cpu) = insn_buff;"

I can help to test if need to try anything, eg. debug patch.

I do not know anything of the pti and ds buffer logic, but below chunk
make the next-merge-window branch booting fine on the laptop.
---
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ad47f6415b17..fa254c576032 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -337,7 +337,7 @@ static int alloc_pebs_buffer(int cpu)
struct debug_store *ds = hwev->ds;
size_t bsiz = x86_pmu.pebs_buffer_size;
int max, node = cpu_to_node(cpu);
- void *buffer, *insn_buff, *cea;
+ void *buffer, *ibuff, *cea;

if (!x86_pmu.pebs)
return 0;
@@ -351,12 +351,12 @@ static int alloc_pebs_buffer(int cpu)
* buffer then.
*/
if (x86_pmu.intel_cap.pebs_format < 2) {
- insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
- if (!insn_buff) {
+ ibuff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ if (!ibuff) {
dsfree_pages(buffer, bsiz);
return -ENOMEM;
}
- per_cpu(insn_buff, cpu) = insn_buff;
+ per_cpu(insn_buff, cpu) = ibuff;
}
hwev->ds_pebs_vaddr = buffer;
/* Update the cpu entry area mapping */

2019-05-14 13:57:42

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Tue, May 14, 2019 at 08:58:35PM +0800, Dave Young wrote:

> Hmm, it seems caused by some WIP branch patches, I suspect below:

Grmbl.. Ingo, can you zap all those WIP branches, please? They mostly
just get in the way of things. If you want to run them, merge them in a
private branch or something.

> commit 124d6af5a5f559e516ed2c6ea857e889ed293b43
> x86/paravirt: Standardize 'insn_buff' variable names
>
> The suspicious line is "per_cpu(insn_buff, cpu) = insn_buff;"

Yah, unfortunatly per-cpu variables live in the same namespace as normal
variables and so the above is incorrect, because the local @insn_buffer
variable shadows the global per-cpu symbol and very weird things will
happen.

This is of course consistent with C rules, where everything lives in the
same namespace...

2019-05-14 14:10:51

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables


* Dave Young <[email protected]> wrote:

> On 05/14/19 at 01:38pm, Peter Zijlstra wrote:
> > On Tue, May 14, 2019 at 04:48:41PM +0800, Dave Young wrote:
> >
> > > > I did some tests on the laptop, thing is:
> > > > 1. apply the 3 patches (two you posted + Boris's revert commit 52b922c3d49c)
> > > > on latest Linus master branch, everything works fine.
> > > >
> > > > 2. build and test the tip/next-merge-window branch, kernel hangs early
> > > > without output, (both 1st boot and kexec boot)
> > >
> > > Update about 2. It should be not early rsdp related, I got the boot log
> > > Since can not reproduce with Linus master branch it may have been fixed.
> >
> > Nothing was changed here since PTI.
> >
> > > [ 0.685374][ T1] rcu: Hierarchical SRCU implementation.
> > > [ 0.686414][ T1] general protection fault: 0000 [#1] SMP PTI
> > > [ 0.687328][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.1.0-rc6+ #877
> > > [ 0.687328][ T1] Hardware name: LENOVO 4236NUC/4236NUC, BIOS 83ET82WW (1.52 ) 06/04/2018
> > > [ 0.687328][ T1] RIP: 0010:reserve_ds_buffers+0x34e/0x450
> >
> > > [ 0.687328][ T1] Call Trace:
> > > [ 0.687328][ T1] ? hardlockup_detector_event_create+0x50/0x50
> > > [ 0.687328][ T1] x86_reserve_hardware+0x173/0x180
> > > [ 0.687328][ T1] x86_pmu_event_init+0x39/0x220
> >
> > The DS buffers are special in that they're part of cpu_entrt_area. If
> > this comes apart it might mean your pagetables are dodgy.
>
> Hmm, it seems caused by some WIP branch patches, I suspect below:
> commit 124d6af5a5f559e516ed2c6ea857e889ed293b43
> x86/paravirt: Standardize 'insn_buff' variable names

This commit had a bug which I fixed - could you try the latest -tip?

Thanks,

Ingo

2019-05-15 01:10:24

by Dave Young

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 05/14/19 at 04:09pm, Ingo Molnar wrote:
>
> * Dave Young <[email protected]> wrote:
>
> > On 05/14/19 at 01:38pm, Peter Zijlstra wrote:
> > > On Tue, May 14, 2019 at 04:48:41PM +0800, Dave Young wrote:
> > >
> > > > > I did some tests on the laptop, thing is:
> > > > > 1. apply the 3 patches (two you posted + Boris's revert commit 52b922c3d49c)
> > > > > on latest Linus master branch, everything works fine.
> > > > >
> > > > > 2. build and test the tip/next-merge-window branch, kernel hangs early
> > > > > without output, (both 1st boot and kexec boot)
> > > >
> > > > Update about 2. It should be not early rsdp related, I got the boot log
> > > > Since can not reproduce with Linus master branch it may have been fixed.
> > >
> > > Nothing was changed here since PTI.
> > >
> > > > [ 0.685374][ T1] rcu: Hierarchical SRCU implementation.
> > > > [ 0.686414][ T1] general protection fault: 0000 [#1] SMP PTI
> > > > [ 0.687328][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.1.0-rc6+ #877
> > > > [ 0.687328][ T1] Hardware name: LENOVO 4236NUC/4236NUC, BIOS 83ET82WW (1.52 ) 06/04/2018
> > > > [ 0.687328][ T1] RIP: 0010:reserve_ds_buffers+0x34e/0x450
> > >
> > > > [ 0.687328][ T1] Call Trace:
> > > > [ 0.687328][ T1] ? hardlockup_detector_event_create+0x50/0x50
> > > > [ 0.687328][ T1] x86_reserve_hardware+0x173/0x180
> > > > [ 0.687328][ T1] x86_pmu_event_init+0x39/0x220
> > >
> > > The DS buffers are special in that they're part of cpu_entrt_area. If
> > > this comes apart it might mean your pagetables are dodgy.
> >
> > Hmm, it seems caused by some WIP branch patches, I suspect below:
> > commit 124d6af5a5f559e516ed2c6ea857e889ed293b43
> > x86/paravirt: Standardize 'insn_buff' variable names
>
> This commit had a bug which I fixed - could you try the latest -tip?

Will do, but I do not use tip tree often, not sure which branch includes
the fix.

https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/
Is it tip/master or tip/tip?

Thanks
Dave

2019-05-15 05:25:04

by Junichi Nomura

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

Hi Kairui,

On 5/13/19 5:02 PM, Baoquan He wrote:
> On 05/13/19 at 09:50am, Borislav Petkov wrote:
>> On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
>> So we're going to try it again this cycle and if there's no fallout, it
>> will go upstream. If not, it will have to be fixed. The usual thing.
>>
>> And I don't care if Kairui's patch fixes this one problem - judging by
>> the fragility of this whole thing, it should be hammered on one more
>> cycle on as many boxes as possible to make sure there's no other SNAFUs.
>>
>> So go test it on more machines instead. I've pushed it here:
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git/log/?h=next-merge-window
>
> Pingfan has got a machine to reproduce the kexec breakage issue, and
> applying these two patches fix it. He planned to paste the test result.
> I will ask him to try this branch if he has time, or I can get his
> machine to test.
>
> Junichi, also have a try on Boris's branch in NEC's test environment?

while the patch set works on most of the machines I'm testing around,
I found kexec(1) fails to load kernel on a few machines if this patch
is applied. Those machines don't have IORES_DESC_ACPI_TABLES region
and have ACPI tables in IORES_DESC_ACPI_NV_STORAGE region instead.

So I think map_acpi_tables() should try to map both regions. I tried
following change in addition and it worked.

--
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.


diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 3c77bdf..3837c4a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -56,12 +56,22 @@ static int mem_region_callback(struct resource *res, void *arg)
{
unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
struct init_pgtable_data data;
+ int ret;

data.info = info;
data.level4p = level4p;
flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
- &data, mem_region_callback);
+ ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
+ &data, mem_region_callback);
+ if (ret && ret != -EINVAL)
+ return ret;
+
+ ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1,
+ &data, mem_region_callback);
+ if (ret && ret != -EINVAL)
+ return ret;
+
+ return 0;
}
#else
static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }

2019-05-15 06:47:28

by Junichi Nomura

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 5/15/19 10:08 AM, Dave Young wrote:
> On 05/14/19 at 04:09pm, Ingo Molnar wrote:
>>> Hmm, it seems caused by some WIP branch patches, I suspect below:
>>> commit 124d6af5a5f559e516ed2c6ea857e889ed293b43
>>> x86/paravirt: Standardize 'insn_buff' variable names
>>
>> This commit had a bug which I fixed - could you try the latest -tip?
>
> Will do, but I do not use tip tree often, not sure which branch includes
> the fix.
>
> https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/
> Is it tip/master or tip/tip?

Just in case, when I tried tip/master, one of test machines crashed
in the same way as:
https://lkml.org/lkml/2019/5/9/182

and I found this patch was needed:
[PATCH] x86: intel_epb: Take CONFIG_PM into account
https://lore.kernel.org/lkml/3431308.1mSSVdqTRr@kreacher/

--
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

2019-05-15 07:00:17

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Wed, May 15, 2019 at 05:17:19AM +0000, Junichi Nomura wrote:
> Hi Kairui,
>
> On 5/13/19 5:02 PM, Baoquan He wrote:
> > On 05/13/19 at 09:50am, Borislav Petkov wrote:
> >> On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
> >> So we're going to try it again this cycle and if there's no fallout, it
> >> will go upstream. If not, it will have to be fixed. The usual thing.
> >>
> >> And I don't care if Kairui's patch fixes this one problem - judging by
> >> the fragility of this whole thing, it should be hammered on one more
> >> cycle on as many boxes as possible to make sure there's no other SNAFUs.
> >>
> >> So go test it on more machines instead. I've pushed it here:
> >>
> >> https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git/log/?h=next-merge-window
> >
> > Pingfan has got a machine to reproduce the kexec breakage issue, and
> > applying these two patches fix it. He planned to paste the test result.
> > I will ask him to try this branch if he has time, or I can get his
> > machine to test.
> >
> > Junichi, also have a try on Boris's branch in NEC's test environment?
>
> while the patch set works on most of the machines I'm testing around,
> I found kexec(1) fails to load kernel on a few machines if this patch
> is applied. Those machines don't have IORES_DESC_ACPI_TABLES region
> and have ACPI tables in IORES_DESC_ACPI_NV_STORAGE region instead.

Why? What kind of machines are those?

Why are the ACPI tables in NV storage?

Looking at crash_setup_memmap_entries(), it already maps that type so I
guess this is needed.

+ Rafael and leaving in the rest for reference.


> So I think map_acpi_tables() should try to map both regions. I tried
> following change in addition and it worked.
>
> --
> Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.
>
>
> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
> index 3c77bdf..3837c4a 100644
> --- a/arch/x86/kernel/machine_kexec_64.c
> +++ b/arch/x86/kernel/machine_kexec_64.c
> @@ -56,12 +56,22 @@ static int mem_region_callback(struct resource *res, void *arg)
> {
> unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> struct init_pgtable_data data;
> + int ret;
>
> data.info = info;
> data.level4p = level4p;
> flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> - return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
> - &data, mem_region_callback);
> + ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
> + &data, mem_region_callback);
> + if (ret && ret != -EINVAL)
> + return ret;
> +
> + ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1,
> + &data, mem_region_callback);
> + if (ret && ret != -EINVAL)
> + return ret;
> +
> + return 0;
> }
> #else
> static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-05-15 07:12:15

by Junichi Nomura

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 5/15/19 3:58 PM, Borislav Petkov wrote:
> On Wed, May 15, 2019 at 05:17:19AM +0000, Junichi Nomura wrote:
>> Hi Kairui,
>>
>> On 5/13/19 5:02 PM, Baoquan He wrote:
>>> On 05/13/19 at 09:50am, Borislav Petkov wrote:
>>>> On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
>>>> So we're going to try it again this cycle and if there's no fallout, it
>>>> will go upstream. If not, it will have to be fixed. The usual thing.
>>>>
>>>> And I don't care if Kairui's patch fixes this one problem - judging by
>>>> the fragility of this whole thing, it should be hammered on one more
>>>> cycle on as many boxes as possible to make sure there's no other SNAFUs.
>>>>
>>>> So go test it on more machines instead. I've pushed it here:
>>>>
>>>> https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git/log/?h=next-merge-window
>>>
>>> Pingfan has got a machine to reproduce the kexec breakage issue, and
>>> applying these two patches fix it. He planned to paste the test result.
>>> I will ask him to try this branch if he has time, or I can get his
>>> machine to test.
>>>
>>> Junichi, also have a try on Boris's branch in NEC's test environment?
>>
>> while the patch set works on most of the machines I'm testing around,
>> I found kexec(1) fails to load kernel on a few machines if this patch
>> is applied. Those machines don't have IORES_DESC_ACPI_TABLES region
>> and have ACPI tables in IORES_DESC_ACPI_NV_STORAGE region instead.
>
> Why? What kind of machines are those?

I don't know. They are just general purpose Xeon-based servers
and not some special purpose machines. So I guess there are other
such machines in the wild.

> Why are the ACPI tables in NV storage?
>
> Looking at crash_setup_memmap_entries(), it already maps that type so I
> guess this is needed.
>
> + Rafael and leaving in the rest for reference.
>
>
>> So I think map_acpi_tables() should try to map both regions. I tried
>> following change in addition and it worked.
>>
>> --
>> Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.
>>
>>
>> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
>> index 3c77bdf..3837c4a 100644
>> --- a/arch/x86/kernel/machine_kexec_64.c
>> +++ b/arch/x86/kernel/machine_kexec_64.c
>> @@ -56,12 +56,22 @@ static int mem_region_callback(struct resource *res, void *arg)
>> {
>> unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
>> struct init_pgtable_data data;
>> + int ret;
>>
>> data.info = info;
>> data.level4p = level4p;
>> flags = IORESOURCE_MEM | IORESOURCE_BUSY;
>> - return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
>> - &data, mem_region_callback);
>> + ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
>> + &data, mem_region_callback);
>> + if (ret && ret != -EINVAL)
>> + return ret;
>> +
>> + ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1,
>> + &data, mem_region_callback);
>> + if (ret && ret != -EINVAL)
>> + return ret;
>> +
>> + return 0;
>> }
>> #else
>> static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }

--
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

2019-05-17 13:51:55

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Tue, May 14, 2019 at 11:22:08AM +0800, Dave Young wrote:
> Another thing is we can move the get rsdp after console_init, but that
> can be done later as separate patch.

https://lkml.kernel.org/r/[email protected]

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-05-17 13:53:53

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH] x86/boot: Call get_rsdp_addr() after console_init()

And now as a proper patch:

---
From: Borislav Petkov <[email protected]>

... so that early debugging output from the RSDP parsing code can be
visible and collected.

Suggested-by: Dave Young <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: Baoquan He <[email protected]>
Cc: Chao Fan <[email protected]>
Cc: Jun'ichi Nomura <[email protected]>
Cc: Kairui Song <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
arch/x86/boot/compressed/misc.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c560df69..24e65a0f756d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -351,9 +351,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
/* Clear flags intended for solely in-kernel use. */
boot_params->hdr.loadflags &= ~KASLR_FLAG;

- /* Save RSDP address for later use. */
- boot_params->acpi_rsdp_addr = get_rsdp_addr();
-
sanitize_boot_params(boot_params);

if (boot_params->screen_info.orig_video_mode == 7) {
@@ -368,6 +365,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
cols = boot_params->screen_info.orig_video_cols;

console_init();
+
+ /*
+ * Save RSDP address for later use. Have this after console_init()
+ * so that early debugging output from the RSDP parsing code can be
+ * collected.
+ */
+ boot_params->acpi_rsdp_addr = get_rsdp_addr();
+
debug_putstr("early console in extract_kernel\n");

free_mem_ptr = heap; /* Heap */
--
2.21.0


--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2019-05-21 09:06:31

by Kairui Song

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Wed, May 15, 2019 at 3:10 PM Junichi Nomura <[email protected]> wrote:
>
> On 5/15/19 3:58 PM, Borislav Petkov wrote:
> > On Wed, May 15, 2019 at 05:17:19AM +0000, Junichi Nomura wrote:
> >> Hi Kairui,
> >>
> >> On 5/13/19 5:02 PM, Baoquan He wrote:
> >>> On 05/13/19 at 09:50am, Borislav Petkov wrote:
> >>>> On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
> >>>> So we're going to try it again this cycle and if there's no fallout, it
> >>>> will go upstream. If not, it will have to be fixed. The usual thing.
> >>>>
> >>>> And I don't care if Kairui's patch fixes this one problem - judging by
> >>>> the fragility of this whole thing, it should be hammered on one more
> >>>> cycle on as many boxes as possible to make sure there's no other SNAFUs.
> >>>>
> >>>> So go test it on more machines instead. I've pushed it here:
> >>>>
> >>>> https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git/log/?h=next-merge-window
> >>>
> >>> Pingfan has got a machine to reproduce the kexec breakage issue, and
> >>> applying these two patches fix it. He planned to paste the test result.
> >>> I will ask him to try this branch if he has time, or I can get his
> >>> machine to test.
> >>>
> >>> Junichi, also have a try on Boris's branch in NEC's test environment?
> >>
> >> while the patch set works on most of the machines I'm testing around,
> >> I found kexec(1) fails to load kernel on a few machines if this patch
> >> is applied. Those machines don't have IORES_DESC_ACPI_TABLES region
> >> and have ACPI tables in IORES_DESC_ACPI_NV_STORAGE region instead.
> >
> > Why? What kind of machines are those?
>
> I don't know. They are just general purpose Xeon-based servers
> and not some special purpose machines. So I guess there are other
> such machines in the wild.
>

Hi, I think it's reasonable to update the patch to include the
NV_STORAGE regions as well, most likely the firmware only provided
NV_STORAGE region? Can you help confirm if the e820 didn't contain
ACPI data, and only ACPI NVS?

I had a try with this update patch, it worked and didn't break anything.

Hi Boris, would you prefer to just fold Junichi update patch into the
previous one or I should send an updated patch?


--
Best Regards,
Kairui Song

2019-05-21 09:30:09

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86/boot: Call get_rsdp_addr() after console_init()

On 05/17/19 at 03:50pm, Borislav Petkov wrote:
> And now as a proper patch:
>
> ---
> From: Borislav Petkov <[email protected]>
>
> ... so that early debugging output from the RSDP parsing code can be
> visible and collected.
>
> Suggested-by: Dave Young <[email protected]>
> Signed-off-by: Borislav Petkov <[email protected]>
> Cc: Baoquan He <[email protected]>
> Cc: Chao Fan <[email protected]>
> Cc: Jun'ichi Nomura <[email protected]>
> Cc: Kairui Song <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> ---
> arch/x86/boot/compressed/misc.c | 11 ++++++++---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> index c0d6c560df69..24e65a0f756d 100644
> --- a/arch/x86/boot/compressed/misc.c
> +++ b/arch/x86/boot/compressed/misc.c
> @@ -351,9 +351,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> /* Clear flags intended for solely in-kernel use. */
> boot_params->hdr.loadflags &= ~KASLR_FLAG;
>
> - /* Save RSDP address for later use. */
> - boot_params->acpi_rsdp_addr = get_rsdp_addr();
> -
> sanitize_boot_params(boot_params);
>
> if (boot_params->screen_info.orig_video_mode == 7) {
> @@ -368,6 +365,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> cols = boot_params->screen_info.orig_video_cols;
>
> console_init();
> +
> + /*
> + * Save RSDP address for later use. Have this after console_init()
> + * so that early debugging output from the RSDP parsing code can be
> + * collected.
> + */
> + boot_params->acpi_rsdp_addr = get_rsdp_addr();
> +
> debug_putstr("early console in extract_kernel\n");

Thanks for making this. FWIW,

Reviewed-by: Baoquan He <[email protected]>

2019-05-21 10:49:27

by Junichi Nomura

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On 2019/05/21 18:02, Kairui Song wrote:
> On Wed, May 15, 2019 at 3:10 PM Junichi Nomura <[email protected]> wrote:
>> On 5/15/19 3:58 PM, Borislav Petkov wrote:
>>> On Wed, May 15, 2019 at 05:17:19AM +0000, Junichi Nomura wrote:
>>>> I found kexec(1) fails to load kernel on a few machines if this patch
>>>> is applied. Those machines don't have IORES_DESC_ACPI_TABLES region
>>>> and have ACPI tables in IORES_DESC_ACPI_NV_STORAGE region instead.
>>>
>>> Why? What kind of machines are those?
>>
>> I don't know. They are just general purpose Xeon-based servers
>> and not some special purpose machines. So I guess there are other
>> such machines in the wild.
>
> Hi, I think it's reasonable to update the patch to include the
> NV_STORAGE regions as well, most likely the firmware only provided
> NV_STORAGE region? Can you help confirm if the e820 didn't contain
> ACPI data, and only ACPI NVS?

Yes, those machines only have ACPI NVS region as far as I see from kernel log.

> I had a try with this update patch, it worked and didn't break anything.
>
> Hi Boris, would you prefer to just fold Junichi update patch into the
> previous one or I should send an updated patch?

--
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

2019-05-21 18:10:27

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Tue, May 21, 2019 at 05:02:59PM +0800, Kairui Song wrote:
> Hi Boris, would you prefer to just fold Junichi update patch into the
> previous one or I should send an updated patch?

Please send a patch ontop after Ingo queues your old one, which should
happen soon. This way it would also document the fact that there are
machines with NVS regions only.

Thx.

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply. Srsly.

2019-05-21 21:57:47

by Dirk van der Merwe

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables


On 5/13/19 8:33 PM, Baoquan He wrote:
> On 05/14/19 at 11:22am, Dave Young wrote:
>> On 05/13/19 at 04:06pm, Baoquan He wrote:
>>> Hi Dave,
>>>
>>> On 05/13/19 at 09:50am, Borislav Petkov wrote:
>>>> On Mon, May 13, 2019 at 03:32:54PM +0800, Baoquan He wrote:
>>>>> This is a critical bug which breaks memory hotplug,
>>>> Please concentrate and stop the blabla:
>>>>
>>>> 36f0c423552d ("x86/boot: Disable RSDP parsing temporarily")
>>>>
>>>> already explains what the deal is. This code was *purposefully* disabled
>>>> because we ran out of time and it broke a couple of machines. Don't make
>>> I remember your machine is the one on whihc the issue is reported. Could
>>> you also test it and confirm if these all things found ealier are
>>> cleared out?
>>>
>> I did some tests on the laptop, thing is:
>> 1. apply the 3 patches (two you posted + Boris's revert commit 52b922c3d49c)
>> on latest Linus master branch, everything works fine.
>>
>> 2. build and test the tip/next-merge-window branch, kernel hangs early
>> without output, (both 1st boot and kexec boot)
> Thanks, Dave.
>
> Yeah, I also tested on a HP machine, problem reprodued on the current
> master branch when revert commit 52b922c3d49c.
>
> Then apply these two patches, problem solved.
>
> Tried boris's next-merge-window branch too, kexec works very well.
>
> Dirk, Junichi, feel free to add your test result if you have time.


I tested this with the patches (plus revert of the workaround) applied
against stable 5.1 and it works fine for me there. Thanks!

Where can I find the next-merge-window tree?

I can test against that too.


Best regards

Dirk

2019-05-21 23:05:22

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Tue, May 21, 2019 at 02:53:52PM -0700, Dirk van der Merwe wrote:
> Where can I find the next-merge-window tree?
>
> I can test against that too.

It'll appear soon in a tip branch. I'd appreciate if you tested that
instead - stay tuned...

Thx.

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply. Srsly.

2019-05-28 02:52:15

by Kairui Song

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Wed, May 22, 2019 at 2:09 AM Borislav Petkov <[email protected]> wrote:
>
> On Tue, May 21, 2019 at 05:02:59PM +0800, Kairui Song wrote:
> > Hi Boris, would you prefer to just fold Junichi update patch into the
> > previous one or I should send an updated patch?
>
> Please send a patch ontop after Ingo queues your old one, which should
> happen soon. This way it would also document the fact that there are
> machines with NVS regions only.
>
> Thx.
>

Hi, by now, I still didn't see any tip branch pick up this patch yet,
any update?

--
Best Regards,
Kairui Song

2019-06-06 20:18:35

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v6 1/2] x86/kexec: Build identity mapping for EFI systab and ACPI tables

On Tue, May 28, 2019 at 10:49:54AM +0800, Kairui Song wrote:
> Hi, by now, I still didn't see any tip branch pick up this patch yet,
> any update?

Ok, stuff is queued in tip:x86/boot now. Please test it as much as you
can and send all fixes ontop.

Thx.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Subject: [tip:x86/boot] x86/kexec: Add the EFI system tables and ACPI tables to the ident map

Commit-ID: 6bbeb276b71f06c5267bfd154629b1bec82e7136
Gitweb: https://git.kernel.org/tip/6bbeb276b71f06c5267bfd154629b1bec82e7136
Author: Kairui Song <[email protected]>
AuthorDate: Mon, 29 Apr 2019 08:23:18 +0800
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 6 Jun 2019 20:13:48 +0200

x86/kexec: Add the EFI system tables and ACPI tables to the ident map

Currently, only the whole physical memory is identity-mapped for the
kexec kernel and the regions reserved by firmware are ignored.

However, the recent addition of RSDP parsing in the decompression stage
and especially:

33f0df8d843d ("x86/boot: Search for RSDP in the EFI tables")

which tries to access EFI system tables and to dig out the RDSP address
from there, becomes a problem because in certain configurations, they
might not be mapped in the kexec'ed kernel's address space.

What is more, this problem doesn't appear on all systems because the
kexec kernel uses gigabyte pages to build the identity mapping. And
the EFI system tables and ACPI tables can, depending on the system
configuration, end up being mapped as part of all physical memory, if
they share the same 1 GB area with the physical memory.

Therefore, make sure they're always mapped.

[ bp: productize half-baked patch:
- rewrite commit message.
- correct the map_acpi_tables() function name in the !ACPI case. ]

Signed-off-by: Kairui Song <[email protected]>
Signed-off-by: Baoquan He <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Tested-by: Dirk van der Merwe <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: "Kirill A. Shutemov" <[email protected]>
Cc: Lianbo Jiang <[email protected]>
Cc: Tetsuo Handa <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: x86-ml <[email protected]>
Link: https://lkml.kernel.org/r/20190429002318.GA25400@MiWiFi-R3L-srv
---
arch/x86/kernel/machine_kexec_64.c | 75 ++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index ceba408ea982..3c77bdf7b32a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
#include <linux/io.h>
#include <linux/suspend.h>
#include <linux/vmalloc.h>
+#include <linux/efi.h>

#include <asm/init.h>
#include <asm/pgtable.h>
@@ -29,6 +30,43 @@
#include <asm/setup.h>
#include <asm/set_memory.h>

+#ifdef CONFIG_ACPI
+/*
+ * Used while adding mapping for ACPI tables.
+ * Can be reused when other iomem regions need be mapped
+ */
+struct init_pgtable_data {
+ struct x86_mapping_info *info;
+ pgd_t *level4p;
+};
+
+static int mem_region_callback(struct resource *res, void *arg)
+{
+ struct init_pgtable_data *data = arg;
+ unsigned long mstart, mend;
+
+ mstart = res->start;
+ mend = mstart + resource_size(res) - 1;
+
+ return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend);
+}
+
+static int
+map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
+{
+ unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ struct init_pgtable_data data;
+
+ data.info = info;
+ data.level4p = level4p;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ return walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
+ &data, mem_region_callback);
+}
+#else
+static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }
+#endif
+
#ifdef CONFIG_KEXEC_FILE
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_bzImage64_ops,
@@ -36,6 +74,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
};
#endif

+static int
+map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
+{
+#ifdef CONFIG_EFI
+ unsigned long mstart, mend;
+
+ if (!efi_enabled(EFI_BOOT))
+ return 0;
+
+ mstart = (boot_params.efi_info.efi_systab |
+ ((u64)boot_params.efi_info.efi_systab_hi<<32));
+
+ if (efi_enabled(EFI_64BIT))
+ mend = mstart + sizeof(efi_system_table_64_t);
+ else
+ mend = mstart + sizeof(efi_system_table_32_t);
+
+ if (!mstart)
+ return 0;
+
+ return kernel_ident_mapping_init(info, level4p, mstart, mend);
+#endif
+ return 0;
+}
+
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.p4d);
@@ -159,6 +222,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
return result;
}

+ /*
+ * Prepare EFI systab and ACPI tables for kexec kernel since they are
+ * not covered by pfn_mapped.
+ */
+ result = map_efi_systab(&info, level4p);
+ if (result)
+ return result;
+
+ result = map_acpi_tables(&info, level4p);
+ if (result)
+ return result;
+
return init_transition_pgtable(image, level4p);
}