Add an API to the vmalloc infrastructure, create_vmalloc_range_check,
which allows for the creation of restricted sub-ranges of vmalloc memory
during the init process, which can only be allocated from via vmalloc
requests with vaddr start addresses explicitly matching the range's
start addresses. Calls to this API can split up to two nodes in the
red-black tree.
create_vmalloc_range_check restricts vmalloc requests not matching the
range's start address to all other locations in the standard vmalloc
range, i.e. users of the interface are responsible for requesting only
correct and appropriate reservations. The primary intention of this API
is supporting ASLR module region allocation regions while not
undermining existing security mechanisms by necessitating interleaved
code and data pages.
To perform range allocation at the appropriate, earliest time, provide a
callback arch_init_checked_vmap_ranges rather than maintaining a linked
list outside of the vmalloc infrastructure, ensuring all vmap management
is still owned by vmalloc.c.
Considering some alternatives, i.e. a large change to the vmalloc
infrastructure to provide true support for a module code dynamic
allocation region, this smaller vstart-based opt-in seems preferable.
These changes may need to wait/be rebased on Mike Rapoport's patches
at [email protected] , but this version is
submitted to account for the justification for unrestricted BPF/kprobe
code allocations and squashes some bugs the last version created in the
bpf selftests.
Changes from v4:
[email protected]
- Fix the corruption because of backslash created by SMTP mailer
- Add config to permit the reduction of BPF memory to 128MB, i.e. fix
issue with the arm64 side of this change implicitly breaking
[email protected] "128MB of
JIT memory can be quickly exhausted"
- Expand modules_alloc region used on arm64 to support larger BPF
allocations present in the selftests.
Changes from v3:
[email protected]
- Added callbacks into arch-specific code to dynamically partition
red-black tree
(The freedom of architectures to determine vm area allocation was deemed
dangerous since there was no possibility of enforcing that areas were
correctly managed.)
Changes from v2:
[email protected]
- No longer depends on reducing the size of the vmalloc region
- Attempted to implement change by allowing architectures to override
most abstract public vmalloc interface
(Overrides on vmalloc methods were deemed undesirable.)
Changes from v1:
CAP5Mv+ydhk=Ob4b40ZahGMgT-5+-VEHxtmA=-LkJiEOOU+K6hw@mail.gmail.com
- Statically reduced the range of the vmalloc region to support
parititoned code ranges
(The trade off between space reduction and security was deemed
unnecessary.)
Signed-off-by: Maxwell Bland <[email protected]>
---
Thanks again to the maintainers for their review, apologies for the
mailer and BPF selftest errors on the previous version. This version
will still be incompatible with BPF allocation limit stress tests, for
clear reasons. I plan to rebase this same exact code on top of Mike
Rapoport's recent patchset, but this version is compatible with the
current linus upstream.
include/linux/vmalloc.h | 14 ++++++
mm/vmalloc.c | 102 ++++++++++++++++++++++++++++++++++++++--
2 files changed, 113 insertions(+), 3 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 98ea90e90439..ece8879ab060 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -81,6 +81,12 @@ struct vmap_area {
unsigned long flags; /* mark type of vm_map_ram area */
};
+struct checked_vmap_range {
+ unsigned long va_start;
+ unsigned long va_end;
+ struct list_head list;
+};
+
/* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */
#ifndef arch_vmap_p4d_supported
static inline bool arch_vmap_p4d_supported(pgprot_t prot)
@@ -125,6 +131,12 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
}
#endif
+#ifndef arch_init_checked_vmap_ranges
+inline void __init arch_init_checked_vmap_ranges(void)
+{
+}
+#endif
+
/*
* Highlevel APIs for driver use
*/
@@ -211,6 +223,8 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
unsigned long flags,
unsigned long start, unsigned long end,
const void *caller);
+int __init create_vmalloc_range_check(unsigned long start_vaddr,
+ unsigned long end_vaddr);
void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 68fa001648cc..8f382b6c31de 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -817,6 +817,16 @@ static struct kmem_cache *vmap_area_cachep;
*/
static LIST_HEAD(free_vmap_area_list);
+static struct kmem_cache *vmap_checked_range_cachep;
+
+/*
+ * This linked list is used to record ranges of the vmalloc
+ * region which are checked at allocation time to ensure they
+ * are only allocated within when an explicit allocation
+ * request to that range is made.
+ */
+static LIST_HEAD(checked_range_list);
+
/*
* This augment red-black tree represents the free vmap space.
* All vmap_area objects in this tree are sorted by va->va_start
@@ -1454,6 +1464,23 @@ merge_or_add_vmap_area_augment(struct vmap_area *va,
return va;
}
+static __always_inline bool
+va_is_range_restricted(struct vmap_area *va, unsigned long vstart)
+{
+ struct checked_vmap_range *range, *tmp;
+
+ if (list_empty(&checked_range_list))
+ return false;
+
+ list_for_each_entry_safe(range, tmp, &checked_range_list, list)
+ if (va->va_start >= range->va_start &&
+ va->va_end <= range->va_end &&
+ vstart != range->va_start)
+ return true;
+
+ return false;
+}
+
static __always_inline bool
is_within_this_va(struct vmap_area *va, unsigned long size,
unsigned long align, unsigned long vstart)
@@ -1501,7 +1528,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
vstart < va->va_start) {
node = node->rb_left;
} else {
- if (is_within_this_va(va, size, align, vstart))
+ if (!va_is_range_restricted(va, vstart) &&
+ is_within_this_va(va, size, align, vstart))
return va;
/*
@@ -1522,7 +1550,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
*/
while ((node = rb_parent(node))) {
va = rb_entry(node, struct vmap_area, rb_node);
- if (is_within_this_va(va, size, align, vstart))
+ if (!va_is_range_restricted(va, vstart) &&
+ is_within_this_va(va, size, align, vstart))
return va;
if (get_subtree_max_size(node->rb_right) >= length &&
@@ -1554,7 +1583,8 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
struct vmap_area *va;
list_for_each_entry(va, head, list) {
- if (!is_within_this_va(va, size, align, vstart))
+ if (va_is_range_restricted(va, vstart) ||
+ !is_within_this_va(va, size, align, vstart))
continue;
return va;
@@ -1717,6 +1747,36 @@ va_clip(struct rb_root *root, struct list_head *head,
return 0;
}
+static inline int
+split_and_alloc_va(struct rb_root *root, struct list_head *head, unsigned long addr)
+{
+ struct vmap_area *va;
+ int ret;
+ struct vmap_area *lva = NULL;
+
+ va = __find_vmap_area(addr, root);
+ if (!va) {
+ pr_err("%s: could not find vmap\n", __func__);
+ return -1;
+ }
+
+ lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
+ if (!lva) {
+ pr_err("%s: unable to allocate va for range\n", __func__);
+ return -1;
+ }
+ lva->va_start = addr;
+ lva->va_end = va->va_end;
+ ret = va_clip(root, head, va, addr, va->va_end - addr);
+ if (WARN_ON_ONCE(ret)) {
+ pr_err("%s: unable to clip code base region\n", __func__);
+ kmem_cache_free(vmap_area_cachep, lva);
+ return -1;
+ }
+ insert_vmap_area_augment(lva, NULL, root, head);
+ return 0;
+}
+
static unsigned long
va_alloc(struct vmap_area *va,
struct rb_root *root, struct list_head *head,
@@ -4424,6 +4484,35 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
}
EXPORT_SYMBOL(remap_vmalloc_range);
+/**
+ * create_vmalloc_range_check - create a checked range of vmalloc memory
+ * @start_vaddr: The starting vaddr of the code range
+ * @end_vaddr: The ending vaddr of the code range
+ *
+ * Returns: 0 for success, -1 on failure
+ *
+ * This function marks regions within or overlapping the vmalloc region for
+ * requested range checking during allocation. When requesting virtual memory,
+ * if the requested starting vaddr does not explicitly match the starting vaddr
+ * of this range, this range will not be allocated from.
+ */
+int __init create_vmalloc_range_check(unsigned long start_vaddr,
+ unsigned long end_vaddr)
+{
+ struct checked_vmap_range *range;
+
+ range = kmem_cache_alloc(vmap_checked_range_cachep, GFP_NOWAIT);
+ if (split_and_alloc_va(&free_vmap_area_root, &free_vmap_area_list, start_vaddr) ||
+ split_and_alloc_va(&free_vmap_area_root, &free_vmap_area_list, end_vaddr))
+ return -1;
+
+ range->va_start = start_vaddr;
+ range->va_end = end_vaddr;
+
+ list_add(&range->list, &checked_range_list);
+ return 0;
+}
+
void free_vm_area(struct vm_struct *area)
{
struct vm_struct *ret;
@@ -5082,6 +5171,11 @@ void __init vmalloc_init(void)
*/
vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
+ /*
+ * Create the cache for checked vmap ranges.
+ */
+ vmap_checked_range_cachep = KMEM_CACHE(checked_vmap_range, SLAB_PANIC);
+
for_each_possible_cpu(i) {
struct vmap_block_queue *vbq;
struct vfree_deferred *p;
@@ -5129,4 +5223,6 @@ void __init vmalloc_init(void)
vmap_node_shrinker->count_objects = vmap_node_shrink_count;
vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
shrinker_register(vmap_node_shrinker);
+
+ arch_init_checked_vmap_ranges();
}
base-commit: ee5b455b0adae9ecafb38b174c648c48f2a3c1a5
--
2.34.1
Introduce a new Kconfig ARM64_FORCE_CODE_PARTITIONING which uses new
vmalloc infrastructure to prevent interleaving code and data pages,
working to both maintain compatible management assumptions made by
non-arch-specific code and make management of these regions more
precise.
For now, it restricts the additional BPF allocations to +SZ_2G beyond
the modules region. This is assumed to be more than enough for cases
where enabling this config are acceptable.
This will allow, for example, the maintenance of PXNTable bits on
dynamically allocated memory or the immutability of certain page middle
directory and higher level descriptors.
For this purpose, move module_init_limits to setup.c, a more appropriate
place since it is an initialization routine, and as a result, move
module_plt_base and module_direct_base to module.h and provide
appropriate "getter" methods.
The two existing code allocation calls for BPF and kprobes now check
the CONFIG option on whether to allocate from the modules memory region,
ensuring they no longer pollute data memory. This ensures regression
compatibility with the decision to loosen allocation restrictions for
VM support from [email protected]
This will make code ensuring the self-patching interface cannot be used
to modify data and data interfaces cannot be used to modify code more
performant.
Add in a mm/vmalloc.c file to perform the appropriate vmalloc_init
callbacks required to ensure segmentation of the virtual memory space.
Signed-off-by: Maxwell Bland <[email protected]>
---
arch/arm64/Kconfig | 16 ++++++
arch/arm64/include/asm/module.h | 7 +++
arch/arm64/include/asm/setup.h | 2 +
arch/arm64/include/asm/vmalloc.h | 3 ++
arch/arm64/kernel/module.c | 85 +++---------------------------
arch/arm64/kernel/probes/kprobes.c | 13 +++--
arch/arm64/kernel/setup.c | 82 ++++++++++++++++++++++++++++
arch/arm64/mm/Makefile | 3 +-
arch/arm64/mm/vmalloc.c | 12 +++++
arch/arm64/net/bpf_jit_comp.c | 20 ++++++-
10 files changed, 160 insertions(+), 83 deletions(-)
create mode 100644 arch/arm64/mm/vmalloc.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b11c98b3e84..8986e3133396 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1321,6 +1321,22 @@ config ARM64_VA_BITS_52
endchoice
+config ARM64_FORCE_CODE_PARTITIONING
+ bool "Force the partitioning of code from vmalloc space"
+ default n
+ help
+ Restricts the allocation of BPF programs and kprobe instruction
+ pages such that they do not overlap with vmalloc region memory
+ and are contained within module code region defined by
+ module_init_limits + SZ_2G. Ensures other vmalloc data pages
+ cannot be allocated within this region.
+
+ This configuration option is incompatible with BPF and kprobe
+ deployments which require more than 128MB (or 2GB, depending on PLT
+ support) of memory, such as when hosting multiple VMs. Enabling this
+ option ensures granular segregation of code from data pages in
+ ARM64 architecture code. If unsure say N here.
+
config ARM64_FORCE_52BIT
bool "Force 52-bit virtual addresses for userspace"
depends on ARM64_VA_BITS_52 && EXPERT
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 79550b22ba19..c7a6497e6d5d 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -65,4 +65,11 @@ static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
return NULL;
}
+extern u64 module_direct_base __ro_after_init;
+extern u64 module_plt_base __ro_after_init;
+
+inline u64 get_modules_base(void);
+inline u64 get_modules_end(void);
+void *module_alloc(unsigned long size);
+
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
index ba269a7a3201..4d1b668effc9 100644
--- a/arch/arm64/include/asm/setup.h
+++ b/arch/arm64/include/asm/setup.h
@@ -41,4 +41,6 @@ static inline bool arch_parse_debug_rodata(char *arg)
}
#define arch_parse_debug_rodata arch_parse_debug_rodata
+int __init module_init_limits(void);
+
#endif
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 38fafffe699f..7d45c7d5b758 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -31,4 +31,7 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
return pgprot_tagged(prot);
}
+#define arch_init_checked_vmap_ranges arch_init_checked_vmap_ranges
+inline void __init arch_init_checked_vmap_ranges(void);
+
#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..c64695f33d09 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -25,90 +25,21 @@
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>
+#include <asm/module.h>
-static u64 module_direct_base __ro_after_init = 0;
-static u64 module_plt_base __ro_after_init = 0;
+u64 module_direct_base __ro_after_init;
+u64 module_plt_base __ro_after_init;
-/*
- * Choose a random page-aligned base address for a window of 'size' bytes which
- * entirely contains the interval [start, end - 1].
- */
-static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
+inline u64 get_modules_base(void)
{
- u64 max_pgoff, pgoff;
-
- if ((end - start) >= size)
- return 0;
-
- max_pgoff = (size - (end - start)) / PAGE_SIZE;
- pgoff = get_random_u32_inclusive(0, max_pgoff);
-
- return start - pgoff * PAGE_SIZE;
+ return (module_plt_base) ? module_plt_base : module_direct_base;
}
-/*
- * Modules may directly reference data and text anywhere within the kernel
- * image and other modules. References using PREL32 relocations have a +/-2G
- * range, and so we need to ensure that the entire kernel image and all modules
- * fall within a 2G window such that these are always within range.
- *
- * Modules may directly branch to functions and code within the kernel text,
- * and to functions and code within other modules. These branches will use
- * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
- * that the entire kernel text and all module text falls within a 128M window
- * such that these are always within range. With PLTs, we can expand this to a
- * 2G window.
- *
- * We chose the 128M region to surround the entire kernel image (rather than
- * just the text) as using the same bounds for the 128M and 2G regions ensures
- * by construction that we never select a 128M region that is not a subset of
- * the 2G region. For very large and unusual kernel configurations this means
- * we may fall back to PLTs where they could have been avoided, but this keeps
- * the logic significantly simpler.
- */
-static int __init module_init_limits(void)
+inline u64 get_modules_end(void)
{
- u64 kernel_end = (u64)_end;
- u64 kernel_start = (u64)_text;
- u64 kernel_size = kernel_end - kernel_start;
-
- /*
- * The default modules region is placed immediately below the kernel
- * image, and is large enough to use the full 2G relocation range.
- */
- BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
- BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
-
- if (!kaslr_enabled()) {
- if (kernel_size < SZ_128M)
- module_direct_base = kernel_end - SZ_128M;
- if (kernel_size < SZ_2G)
- module_plt_base = kernel_end - SZ_2G;
- } else {
- u64 min = kernel_start;
- u64 max = kernel_end;
-
- if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
- pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
- } else {
- module_direct_base = random_bounding_box(SZ_128M, min, max);
- if (module_direct_base) {
- min = module_direct_base;
- max = module_direct_base + SZ_128M;
- }
- }
-
- module_plt_base = random_bounding_box(SZ_2G, min, max);
- }
-
- pr_info("%llu pages in range for non-PLT usage",
- module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
- pr_info("%llu pages in range for PLT usage",
- module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
-
- return 0;
+ return (module_plt_base) ? module_plt_base + SZ_2G :
+ module_direct_base + SZ_128M;
}
-subsys_initcall(module_init_limits);
void *module_alloc(unsigned long size)
{
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 327855a11df2..e612419f695d 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -131,9 +131,16 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
void *alloc_insn_page(void)
{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
+ if (IS_ENABLED(CONFIG_ARCH_FORCE_CODE_PARTITIONING))
+ return __vmalloc_node_range(PAGE_SIZE, 1, get_modules_base(),
+ get_modules_end() + SZ_2G, GFP_KERNEL,
+ PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ else
+ return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
+ VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
+ VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+ __builtin_return_address(0));
}
/* arm kprobe: install breakpoint in text */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 65a052bf741f..ad0712d4d682 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -268,6 +268,86 @@ static int __init reserve_memblock_reserved_regions(void)
}
arch_initcall(reserve_memblock_reserved_regions);
+/*
+ * Choose a random page-aligned base address for a window of 'size' bytes which
+ * entirely contains the interval [start, end - 1].
+ */
+static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
+{
+ u64 max_pgoff, pgoff;
+
+ if ((end - start) >= size)
+ return 0;
+
+ max_pgoff = (size - (end - start)) / PAGE_SIZE;
+ pgoff = get_random_u32_inclusive(0, max_pgoff);
+
+ return start - pgoff * PAGE_SIZE;
+}
+
+/*
+ * Modules may directly reference data and text anywhere within the kernel
+ * image and other modules. References using PREL32 relocations have a +/-2G
+ * range, and so we need to ensure that the entire kernel image and all modules
+ * fall within a 2G window such that these are always within range.
+ *
+ * Modules may directly branch to functions and code within the kernel text,
+ * and to functions and code within other modules. These branches will use
+ * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
+ * that the entire kernel text and all module text falls within a 128M window
+ * such that these are always within range. With PLTs, we can expand this to a
+ * 2G window.
+ *
+ * We chose the 128M region to surround the entire kernel image (rather than
+ * just the text) as using the same bounds for the 128M and 2G regions ensures
+ * by construction that we never select a 128M region that is not a subset of
+ * the 2G region. For very large and unusual kernel configurations this means
+ * we may fall back to PLTs where they could have been avoided, but this keeps
+ * the logic significantly simpler.
+ */
+int __init module_init_limits(void)
+{
+ u64 kernel_end = (u64)_end;
+ u64 kernel_start = (u64)_text;
+ u64 kernel_size = kernel_end - kernel_start;
+
+ /*
+ * The default modules region is placed immediately below the kernel
+ * image, and is large enough to use the full 2G relocation range.
+ */
+ BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
+ BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
+
+ if (!kaslr_enabled()) {
+ if (kernel_size < SZ_128M)
+ module_direct_base = kernel_end - SZ_128M;
+ if (kernel_size < SZ_2G)
+ module_plt_base = kernel_end - SZ_2G;
+ } else {
+ u64 min = kernel_start;
+ u64 max = kernel_end;
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+ pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
+ } else {
+ module_direct_base = random_bounding_box(SZ_128M, min, max);
+ if (module_direct_base) {
+ min = module_direct_base;
+ max = module_direct_base + SZ_128M;
+ }
+ }
+
+ module_plt_base = random_bounding_box(SZ_2G, min, max);
+ }
+
+ pr_info("%llu pages in range for non-PLT usage",
+ module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
+ pr_info("%llu pages in range for PLT usage",
+ module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
+
+ return 0;
+}
+
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
u64 cpu_logical_map(unsigned int cpu)
@@ -366,6 +446,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
"This indicates a broken bootloader or old kernel\n",
boot_args[1], boot_args[2], boot_args[3]);
}
+
+ module_init_limits();
}
static inline bool cpu_can_disable(unsigned int cpu)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 60454256945b..6d164f5852c1 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -2,7 +2,8 @@
obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \
- context.o proc.o pageattr.o fixmap.o
+ context.o proc.o pageattr.o fixmap.o \
+ vmalloc.o
obj-$(CONFIG_ARM64_CONTPTE) += contpte.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
diff --git a/arch/arm64/mm/vmalloc.c b/arch/arm64/mm/vmalloc.c
new file mode 100644
index 000000000000..59383f0b6ab2
--- /dev/null
+++ b/arch/arm64/mm/vmalloc.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/vmalloc.h>
+#include <linux/elf.h>
+
+#include <asm/module.h>
+
+inline void __init arch_init_checked_vmap_ranges(void)
+{
+ if (IS_ENABLED(CONFIG_ARM64_FORCE_CODE_PARTITIONING))
+ create_vmalloc_range_check(get_modules_base(),
+ get_modules_end() + SZ_2G);
+}
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 13eac43c632d..6a68bb37b9ff 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -13,6 +13,8 @@
#include <linux/memory.h>
#include <linux/printk.h>
#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/moduleloader.h>
#include <asm/asm-extable.h>
#include <asm/byteorder.h>
@@ -1790,13 +1792,27 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
u64 bpf_jit_alloc_exec_limit(void)
{
- return VMALLOC_END - VMALLOC_START;
+ if (IS_ENABLED(CONFIG_ARM64_FORCE_CODE_PARTITIONING))
+ return get_modules_end() - get_modules_base() + SZ_2G;
+ else
+ return VMALLOC_END - VMALLOC_START;
}
void *bpf_jit_alloc_exec(unsigned long size)
{
+ void *p = NULL;
+
+ if (IS_ENABLED(CONFIG_ARM64_FORCE_CODE_PARTITIONING)) {
+ p = __vmalloc_node_range(size, MODULE_ALIGN,
+ get_modules_base(), get_modules_end() + SZ_2G,
+ GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ } else {
+ p = vmalloc(size);
+ }
+
/* Memory is intended to be executable, reset the pointer tag. */
- return kasan_reset_tag(vmalloc(size));
+ return kasan_reset_tag(p);
}
void bpf_jit_free_exec(void *addr)
--
2.34.1