2021-10-30 03:25:22

by Liao, Chang

[permalink] [raw]
Subject: [PATCH 0/3] riscv: kexec: add kexec_file_load() support

This patchset implement kexec_file_load() support on riscv, Most of the
code is based on the kexec-tool-patch repo developed by Nick Kossifids.

This patch series enables us to load the riscv vmlinux by specifying
its file decriptor, instead of user-filled buffer via kexec_file_load()
syscall.

Contrary to kexec_load() system call, we reuse the dt blob of the first
kernel to the 2nd explicitly.

To use kexec_file_load() system call, instead of kexec_load(), at kexec
command, '-s' options must be specified. The patch for kexec_tools has
to be apply to riscv architecture source like this:

int elf_riscv_load(int argc, char **argv, const char *buf, off_t len,
...
if (info->file_mode) {
return prepare_kexec_file_options(info);
}
...

Add following routine to prepare cmdline_ptr, cmdline_len and initrd_fd
for syscall kexec_file_load:

int prepare_kexec_file_options(struct kexec_info *info)
{
int fd;
ssize_t result;
struct stat stats;

if (arch_options.cmdline) {
info->command_line = (char *)arch_options.cmdline;
info->command_line_len = strlen(info->command_line) + 1;
}

if (!arch_options.initrd_path) {
info->initrd_fd = -1;
return 0;
}

fd = open(arch_options.initrd_path, O_RDONLY | _O_BINARY);
if (fd < 0) {
fprintf(stderr, "Cannot open `%s': %s\n", arch_options.initrd_path,
strerror(errno));
return -EINVAL;
}
result = fstat(fd, &stats);
if (result < 0) {
close(fd);
fprintf(stderr, "Cannot stat: %s: %s\n", arch_options.initrd_path,
strerror(errno));
return -EINVAL;
}
info->initrd_fd = fd;
return 0;
}

The basic usage of kexec_file is:
1) Reload capture kernel image:
$ kexec -s -l <riscv-vmlinux> --reuse-cmdline

2) Startup capture kernel:
$ kexec -e

For future work:
* Support for kdump and purgatory.
* Support for kernel image verification.
* Support physical address randomization.

Liao Chang (3):
kexec_file: Fix kexec_file.c build error for riscv platform
RISC-V: use memcpy for kexec_file mode
RISC-V: Add kexec_file support

arch/riscv/Kconfig | 11 ++
arch/riscv/include/asm/kexec.h | 4 +
arch/riscv/kernel/Makefile | 1 +
arch/riscv/kernel/elf_kexec.c | 189 +++++++++++++++++++++++++
arch/riscv/kernel/machine_kexec.c | 5 +-
arch/riscv/kernel/machine_kexec_file.c | 14 ++
include/linux/kexec.h | 2 +-
kernel/kexec_file.c | 4 +-
8 files changed, 226 insertions(+), 4 deletions(-)
create mode 100644 arch/riscv/kernel/elf_kexec.c
create mode 100644 arch/riscv/kernel/machine_kexec_file.c

--
2.17.1


2021-10-30 03:28:45

by Liao, Chang

[permalink] [raw]
Subject: [PATCH 3/3] RISC-V: Add kexec_file support

This patch adds support for kexec_file on RISC-V. I tested it on riscv64
QEMU with busybear-linux and single core along with the OpenSBI firmware
fw_jump.bin for generic platform.

On SMP system, it depends on CONFIG_{HOTPLUG_CPU, RISCV_SBI} to
resume/stop hart through OpenSBI firmware, it also needs a OpenSBI that
support the HSM extension.

Signed-off-by: Liao Chang <[email protected]>
---
arch/riscv/Kconfig | 11 ++
arch/riscv/include/asm/kexec.h | 4 +
arch/riscv/kernel/Makefile | 1 +
arch/riscv/kernel/elf_kexec.c | 180 +++++++++++++++++++++++++
arch/riscv/kernel/machine_kexec_file.c | 14 ++
5 files changed, 210 insertions(+)
create mode 100644 arch/riscv/kernel/elf_kexec.c
create mode 100644 arch/riscv/kernel/machine_kexec_file.c

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 301a54233c7e..5ebe8272dec7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -410,6 +410,17 @@ config KEXEC

The name comes from the similarity to the exec system call.

+config KEXEC_FILE
+ bool "kexec file based systmem call"
+ select KEXEC_CORE
+ select KEXEC_ELF
+ select HAVE_IMA_KEXEC if IMA
+ help
+ This is new version of kexec system call. This system call is
+ file based and takes file descriptors as system call argument
+ for kernel and initramfs as opposed to list of segments as
+ accepted by previous system call.
+
config CRASH_DUMP
bool "Build kdump crash kernel"
help
diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h
index e4e291d40759..206217b23301 100644
--- a/arch/riscv/include/asm/kexec.h
+++ b/arch/riscv/include/asm/kexec.h
@@ -53,4 +53,8 @@ typedef void (*riscv_kexec_method)(unsigned long first_ind_entry,

extern riscv_kexec_method riscv_kexec_norelocate;

+#ifdef CONFIG_KEXEC_FILE
+extern const struct kexec_file_ops elf_kexec_ops;
+#endif
+
#endif
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 3397ddac1a30..8a27639d512a 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -60,6 +60,7 @@ endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o

obj-$(CONFIG_JUMP_LABEL) += jump_label.o
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
new file mode 100644
index 000000000000..269be9f9edb8
--- /dev/null
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2021 Huawei Technologies Co, Ltd.
+ *
+ * Author: Liao Chang ([email protected])
+ *
+ * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
+ * for kernel.
+ */
+
+#define pr_fmt(fmt) "kexec_image: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
+#include <linux/types.h>
+
+static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
+ struct kexec_elf_info *elf_info, unsigned long old_pbase,
+ unsigned long new_pbase)
+{
+ int i;
+ int ret = 0;
+ size_t size;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+
+ kbuf.image = image;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ size = phdr->p_filesz;
+ if (size > phdr->p_memsz)
+ size = phdr->p_memsz;
+
+ kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
+ kbuf.bufsz = size;
+ kbuf.buf_align = phdr->p_align;
+ kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
+ kbuf.memsz = phdr->p_memsz;
+ kbuf.top_down = false;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Go through the available phsyical memory regions and find one that hold
+ * an image of the specified size.
+ */
+static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
+ struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
+ unsigned long *old_pbase, unsigned long *new_pbase)
+{
+ int i;
+ int ret;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+ unsigned long lowest_paddr = ULONG_MAX;
+ unsigned long lowest_vaddr = ULONG_MAX;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ if (lowest_paddr > phdr->p_paddr)
+ lowest_paddr = phdr->p_paddr;
+
+ if (lowest_vaddr > phdr->p_vaddr)
+ lowest_vaddr = phdr->p_vaddr;
+ }
+
+ kbuf.image = image;
+ kbuf.buf_min = lowest_paddr;
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
+ kbuf.top_down = false;
+ ret = arch_kexec_locate_mem_hole(&kbuf);
+ if (!ret) {
+ *old_pbase = lowest_paddr;
+ *new_pbase = kbuf.mem;
+ image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
+ }
+ return ret;
+}
+
+static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ unsigned long old_kernel_pbase = ULONG_MAX;
+ unsigned long new_kernel_pbase = 0UL;
+ unsigned long initrd_pbase = 0UL;
+ void *fdt;
+ struct elfhdr ehdr;
+ struct kexec_buf kbuf;
+ struct kexec_elf_info elf_info;
+
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
+ &old_kernel_pbase, &new_kernel_pbase);
+ if (ret)
+ goto out;
+ pr_notice("The entry point of kernel at 0x%lx\n", image->start);
+
+ /* Add the kernel binary to the image */
+ ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
+ old_kernel_pbase, new_kernel_pbase);
+ if (ret)
+ goto out;
+
+ kbuf.image = image;
+ kbuf.buf_min = new_kernel_pbase + kernel_len;
+ kbuf.buf_max = ULONG_MAX;
+ /* Add the initrd to the image */
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = false;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_pbase = kbuf.mem;
+ pr_notice("Loaded initrd at 0x%lx\n", initrd_pbase);
+ }
+
+ /* Add the DTB to the image */
+ fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
+ initrd_len, cmdline, 0);
+ if (!fdt) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ fdt_pack(fdt);
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.top_down = true;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ pr_err("Error add DTB kbuf ret=%d\n", ret);
+ goto out_free_fdt;
+ }
+ pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem);
+ goto out;
+
+out_free_fdt:
+ kvfree(fdt);
+out:
+ kexec_free_elf_info(&elf_info);
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops elf_kexec_ops = {
+ .probe = kexec_elf_probe,
+ .load = elf_kexec_load,
+};
diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c
new file mode 100644
index 000000000000..b0bf8c1722c0
--- /dev/null
+++ b/arch/riscv/kernel/machine_kexec_file.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kexec_file for riscv, use vmlinux as the dump-capture kernel image.
+ *
+ * Copyright (C) 2021 Huawei Technologies Co, Ltd.
+ *
+ * Author: Liao Chang ([email protected])
+ */
+#include <linux/kexec.h>
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+ &elf_kexec_ops,
+ NULL
+};
--
2.17.1

2021-10-30 03:29:47

by Liao, Chang

[permalink] [raw]
Subject: [PATCH 1/3] kexec_file: Fix kexec_file.c build error for riscv platform

When CONFIG_KEXEC_FILE is set for riscv platform, the compilation of
kernel/kexec_file.c generate build error:

kernel/kexec_file.c: In function 'crash_prepare_elf64_headers':
./arch/riscv/include/asm/page.h:110:71: error: request for member 'virt_addr' in something not a structure or union
110 | ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr))
| ^
./arch/riscv/include/asm/page.h:131:2: note: in expansion of macro 'is_linear_mapping'
131 | is_linear_mapping(_x) ? \
| ^~~~~~~~~~~~~~~~~
./arch/riscv/include/asm/page.h:140:31: note: in expansion of macro '__va_to_pa_nodebug'
140 | #define __phys_addr_symbol(x) __va_to_pa_nodebug(x)
| ^~~~~~~~~~~~~~~~~~
./arch/riscv/include/asm/page.h:143:24: note: in expansion of macro '__phys_addr_symbol'
143 | #define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
| ^~~~~~~~~~~~~~~~~~
kernel/kexec_file.c:1327:36: note: in expansion of macro '__pa_symbol'
1327 | phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);

This occurs is because the "kernel_map" referenced in macro
is_linear_mapping() is suppose to be the one of struct kernel_mapping
defined in arch/riscv/mm/init.c, but the 2nd argument of
crash_prepare_elf64_header() has same symbol name, in expansion of macro
is_linear_mapping in function crash_prepare_elf64_header(), "kernel_map"
actually is the local variable.

Signed-off-by: Liao Chang <[email protected]>
---
include/linux/kexec.h | 2 +-
kernel/kexec_file.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 0c994ae37729..25b91cca8077 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -221,7 +221,7 @@ struct crash_mem {
extern int crash_exclude_mem_range(struct crash_mem *mem,
unsigned long long mstart,
unsigned long long mend);
-extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
+extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
void **addr, unsigned long *sz);
#endif /* CONFIG_KEXEC_FILE */

diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 33400ff051a8..0837d8c0e792 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -1255,7 +1255,7 @@ int crash_exclude_mem_range(struct crash_mem *mem,
return 0;
}

-int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
+int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
void **addr, unsigned long *sz)
{
Elf64_Ehdr *ehdr;
@@ -1319,7 +1319,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
phdr++;

/* Prepare PT_LOAD type program header for kernel text region */
- if (kernel_map) {
+ if (need_kernel_map) {
phdr->p_type = PT_LOAD;
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_vaddr = (unsigned long) _text;
--
2.17.1