Category: feature
Bugzilla: NA
CVE: NA
Use reserved memory to create a pmem device to store the
processes information that dumped before kernel update.
When you want to use this feature you need to declare by
"pmemmem=pmem_size:pmem_phystart" in cmdline.
(exp: pmemmem=100M:0x202000000000)
Signed-off-by: Zhuling <[email protected]>
---
arch/arm64/kernel/setup.c | 5 +++
arch/arm64/mm/init.c | 90 +++++++++++++++++++++++++++++++++++++++
drivers/nvdimm/Kconfig | 11 +++++
drivers/nvdimm/Makefile | 3 ++
drivers/nvdimm/kup_pmem.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++
include/linux/ioport.h | 1 +
include/linux/mm.h | 4 ++
lib/Kconfig | 6 +++
8 files changed, 226 insertions(+)
create mode 100644 drivers/nvdimm/kup_pmem.c
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 133257f..0bd9429 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -237,6 +237,11 @@ static void __init request_standard_resources(void)
if (kernel_data.start >= res->start &&
kernel_data.end <= res->end)
request_resource(res, &kernel_data);
+#ifdef CONFIG_KUP_PMEM_MEMORY
+ if (pmem_res.end)
+ insert_resource(&iomem_resource, &pmem_res);
+#endif
+
#ifdef CONFIG_KEXEC_CORE
/* Userspace will find "Crash kernel" region in /proc/iomem. */
if (crashk_res.end && crashk_res.start >= res->start &&
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 0955406..9d1395e 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -62,6 +62,18 @@ EXPORT_SYMBOL(memstart_addr);
phys_addr_t arm64_dma_phys_limit __ro_after_init;
static phys_addr_t arm64_dma32_phys_limit __ro_after_init;
+#ifdef CONFIG_KUP_PMEM_MEMORY
+static unsigned long long pmem_size, pmem_phystart;
+
+struct resource pmem_res = {
+ .name = "Kpmem Dev",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_MEM,
+ .desc = IORES_DESC_KPMEM_DEV
+};
+#endif
+
#ifdef CONFIG_KEXEC_CORE
/*
* reserve_crashkernel() - reserves memory for crash kernel
@@ -123,6 +135,80 @@ static void __init reserve_crashkernel(void)
}
#endif /* CONFIG_KEXEC_CORE */
+#ifdef CONFIG_KUP_PMEM_MEMORY
+/*
+ * reserve_pmem() - reserves memory for pmem
+ *
+ * This function reserves memory area given in "pmemmem=" kernel command
+ * line parameter. The memory reserved is used by pmem restore progress
+ * when kernel update.
+ */
+static int __init parse_pmem(char *par)
+{
+ char *cur = par;
+
+ if (!par)
+ return 0;
+
+ pmem_size = 0;
+ pmem_phystart = 0;
+
+ pmem_size = memparse(par, &cur);
+ if (par == cur) {
+ pr_warn("pmem: memory value expected\n");
+ return -EINVAL;
+ }
+
+ if (*cur == ':')
+ pmem_phystart = memparse(cur+1, &cur);
+ else if (*cur != ' ' && *cur != '\0') {
+ pr_warn("pmem: unrecognized char %c\n", *cur);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+early_param("pmemmem", parse_pmem);
+
+static void __init reserve_pmem(void)
+{
+ if (!pmem_size || !pmem_phystart) {
+ return;
+ }
+
+ pmem_size = PAGE_ALIGN(pmem_size);
+
+ if (!memblock_is_region_memory(pmem_phystart, pmem_size)) {
+ pr_warn("cannot reserve pmem: region is not memory!\n");
+ return;
+ }
+
+ if (memblock_is_region_reserved(pmem_phystart, pmem_size)) {
+ pr_warn("cannot reserve pmem: region overlaps reserved memory!\n");
+ return;
+ }
+
+ if (!IS_ALIGNED(pmem_phystart, SZ_2M)) {
+ pr_warn("cannot reserve pmem: base address is not 2MB aligned\n");
+ return;
+ }
+ memblock_reserve(pmem_phystart, pmem_size);
+ memblock_remove(pmem_phystart, pmem_size);
+ pr_info("pmem reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
+ pmem_phystart, pmem_phystart + pmem_size, pmem_size >> 20);
+
+ pmem_res.start = pmem_phystart;
+ pmem_res.end = pmem_phystart + pmem_size - 1;
+}
+#else
+static void __init reserve_pmem(void)
+{
+}
+static void __init reserve_pmem_pages(void)
+{
+}
+#endif /*CONFIG_KUP_PMEM_MEMORY*/
+
#ifdef CONFIG_CRASH_DUMP
static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
const char *uname, int depth, void *data)
@@ -390,6 +476,10 @@ void __init arm64_memblock_init(void)
reserve_elfcorehdr();
+#ifdef CONFIG_KUP_PMEM_MEMORY
+ reserve_pmem();
+#endif
+
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
dma_contiguous_reserve(arm64_dma32_phys_limit);
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index b7d1eb3..7f5fa22 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -119,6 +119,17 @@ config NVDIMM_KEYS
depends on ENCRYPTED_KEYS
depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m
+config KUP_PMEM
+ tristate "Persistent memory for kernel update"
+ depends on LIBNVDIMM
+ depends on KUP_PMEM_MEMORY
+ default LIBNVDIMM
+ help
+ Allows regions of persistent memory to be described in the
+ device-tree.
+
+ Select Y if unsure.
+
config NVDIMM_TEST_BUILD
tristate "Build the unit test core"
depends on m
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 29203f3..39fabc3 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_ND_BLK) += nd_blk.o
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
obj-$(CONFIG_OF_PMEM) += of_pmem.o
obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o
+obj-$(CONFIG_KUP_PMEM) += nd_kup_pmem.o
nd_pmem-y := pmem.o
@@ -15,6 +16,8 @@ nd_blk-y := blk.o
nd_e820-y := e820.o
+nd_kup_pmem-y := kup_pmem.o
+
libnvdimm-y := core.o
libnvdimm-y += bus.o
libnvdimm-y += dimm_devs.o
diff --git a/drivers/nvdimm/kup_pmem.c b/drivers/nvdimm/kup_pmem.c
new file mode 100644
index 0000000..eadf95a
--- /dev/null
+++ b/drivers/nvdimm/kup_pmem.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ *
+ * kup_pmem.c - kernel update support code.
+ * create a pmem device to store the processes information that is dumped
+ * when we want to kernel update.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/memory_hotplug.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <asm/io.h>
+
+static const struct attribute_group *kup_pmem_attribute_groups[] = {
+ &nvdimm_bus_attribute_group,
+ NULL,
+};
+
+static const struct attribute_group *kup_pmem_region_attribute_groups[] = {
+ &nd_region_attribute_group,
+ &nd_device_attribute_group,
+ NULL,
+};
+
+static int kup_pmem_remove(struct platform_device *pdev)
+{
+ struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev);
+
+ nvdimm_bus_unregister(nvdimm_bus);
+
+ return 0;
+}
+
+static int kup_register_one(struct resource *res, void *data)
+{
+ struct nd_region_desc ndr_desc;
+ struct nvdimm_bus *nvdimm_bus = data;
+
+ memset(&ndr_desc, 0, sizeof(ndr_desc));
+ ndr_desc.res = res;
+ ndr_desc.attr_groups = kup_pmem_region_attribute_groups;
+ ndr_desc.numa_node = NUMA_NO_NODE;
+ set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+ if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
+ return -ENXIO;
+ return 0;
+}
+
+static int kup_pmem_probe(struct platform_device *pdev)
+{
+ static struct nvdimm_bus_descriptor nd_desc;
+ struct device *dev = &pdev->dev;
+ struct nvdimm_bus *nvdimm_bus;
+ int rc = -ENXIO;
+
+ nd_desc.attr_groups = kup_pmem_attribute_groups;
+ nd_desc.provider_name = "kup_pmem";
+ nd_desc.module = THIS_MODULE;
+ nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
+ if (!nvdimm_bus)
+ goto err;
+ platform_set_drvdata(pdev, nvdimm_bus);
+
+ rc = walk_iomem_res_desc(IORES_DESC_KPMEM_DEV,
+ IORESOURCE_MEM, 0, -1, nvdimm_bus, kup_register_one);
+ if (rc)
+ goto err;
+
+ return 0;
+err:
+ nvdimm_bus_unregister(nvdimm_bus);
+ dev_err(dev, "kup_pmem: failed to register legacy persistent memory ranges\n");
+ return rc;
+}
+
+static struct platform_driver kup_pmem_driver = {
+ .probe = kup_pmem_probe,
+ .remove = kup_pmem_remove,
+ .driver = {
+ .name = "kup_pmem",
+ },
+};
+static struct platform_device *pdev;
+
+static __init int register_kup_pmem(void)
+{
+ platform_driver_register(&kup_pmem_driver);
+ pdev = platform_device_alloc("kup_pmem", -1);
+
+ return platform_device_add(pdev);
+}
+
+static __exit void unregister_kup_pmem(void)
+{
+ platform_device_del(pdev);
+ platform_driver_unregister(&kup_pmem_driver);
+}
+
+module_init(register_kup_pmem);
+module_exit(unregister_kup_pmem);
+MODULE_ALIAS("platform:kup_pmem*");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5135d4b..bba36f2 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -139,6 +139,7 @@ enum {
IORES_DESC_DEVICE_PRIVATE_MEMORY = 6,
IORES_DESC_RESERVED = 7,
IORES_DESC_SOFT_RESERVED = 8,
+ IORES_DESC_KPMEM_DEV = 9,
};
/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index db6ae4d..2b2a94c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -45,6 +45,10 @@ extern int sysctl_page_lock_unfairness;
void init_mm_internals(void);
+#ifdef CONFIG_KUP_PMEM_MEMORY
+extern struct resource pmem_res;
+#endif
+
#ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */
extern unsigned long max_mapnr;
diff --git a/lib/Kconfig b/lib/Kconfig
index b46a9fd..ff5677c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -689,3 +689,9 @@ config GENERIC_LIB_UCMPDI2
config PLDMFW
bool
default n
+
+config KUP_PMEM_MEMORY
+ bool "reserve memory for kup pmem to store image"
+ default y
+ help
+ Say y here to enable this feature
--
2.9.5
Hi Zhuling,
On Wed, Dec 30, 2020 at 10:18 PM Zhuling <[email protected]> wrote:
>
> Category: feature
> Bugzilla: NA
> CVE: NA
These tags can be dropped.
>
> Use reserved memory to create a pmem device to store the
> processes information that dumped before kernel update.
> When you want to use this feature you need to declare by
> "pmemmem=pmem_size:pmem_phystart" in cmdline.
> (exp: pmemmem=100M:0x202000000000)
>
Interesting. I like the feature, but it's not clear to me that a new
command line based configuration scheme is needed. There is the
existing memmap= parameter that on x86 describes a
IORES_DESC_PERSISTENT_MEMORY_LEGACY range. The driver/nvdimm/e820.c
driver could be reworked to attach to the same thing on ARM64.
Then as far as assigning memory to different kernel usages there is
the existing capability in libnvdimm to attach a "personality" to an
nvdimm namespace. I imagine you could write a special signature to the
namespace that libnvdimm would recognize as a KUP reservation
namespace and work generically across any arch.