pkvm_hyp_alloc_private_va_range() can be used to reserve private VA ranges
in the pKVM nVHE hypervisor. Allocations are aligned based on the order of
the requested size.
This will be used to implement stack guard pages for pKVM nVHE hypervisor
(in a subsequent patch in the series).
Credits to Quentin Perret <[email protected]> for the idea of moving
private VA allocation out of __pkvm_create_private_mapping()
Signed-off-by: Kalesh Singh <[email protected]>
Tested-by: Fuad Tabba <[email protected]>
Reviewed-by: Fuad Tabba <[email protected]>
---
Changes in v7:
- Add Fuad's Reviewed-by and Tested-by tags.
Changes in v6:
- Update kernel-doc for pkvm_alloc_private_va_range() and add
return description, per Stephen
- Update pkvm_alloc_private_va_range() to return an int error code,
per Stephen
- Update __pkvm_create_private_mapping to return an in error code,
per Quentin
- Update callers of __pkvm_create_private_mapping() to handle new
return value and params.
Changes in v5:
- Align private allocations based on the order of their size, per Marc
Changes in v4:
- Handle null ptr in pkvm_alloc_private_va_range() and replace
IS_ERR_OR_NULL checks in callers with IS_ERR checks, per Fuad
- Fix kernel-doc comments format, per Fuad
- Format __pkvm_create_private_mapping() prototype args (< 80 col), per Fuad
Changes in v3:
- Handle null ptr in IS_ERR_OR_NULL checks, per Mark
Changes in v2:
- Allow specifying an alignment for the private VA allocations, per Marc
arch/arm64/kvm/hyp/include/nvhe/mm.h | 6 ++-
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 18 ++++++-
arch/arm64/kvm/hyp/nvhe/mm.c | 78 ++++++++++++++++++----------
3 files changed, 72 insertions(+), 30 deletions(-)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 2d08510c6cc1..42d8eb9bfe72 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -19,8 +19,10 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
-unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
- enum kvm_pgtable_prot prot);
+int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
+ enum kvm_pgtable_prot prot,
+ unsigned long *haddr);
+int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
unsigned long *start, unsigned long *end)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 5e2197db0d32..3cea4b6ac23e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -160,7 +160,23 @@ static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ct
DECLARE_REG(size_t, size, host_ctxt, 2);
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
- cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot);
+ /*
+ * __pkvm_create_private_mapping() populates a pointer with the
+ * hypervisor start address of the allocation.
+ *
+ * However, handle___pkvm_create_private_mapping() hypercall crosses the
+ * EL1/EL2 boundary so the pointer would not be valid in this context.
+ *
+ * Instead pass the allocation address as the return value (or return
+ * ERR_PTR() on failure).
+ */
+ unsigned long haddr;
+ int err = __pkvm_create_private_mapping(phys, size, prot, &haddr);
+
+ if (err)
+ haddr = (unsigned long)ERR_PTR(err);
+
+ cpu_reg(host_ctxt, 1) = haddr;
}
static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index cdbe8e246418..670f11349070 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -37,36 +37,60 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
-unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
- enum kvm_pgtable_prot prot)
+/**
+ * pkvm_alloc_private_va_range - Allocates a private VA range.
+ * @size: The size of the VA range to reserve.
+ * @haddr: The hypervisor virtual start address of the allocation.
+ *
+ * The private virtual address (VA) range is allocated above __io_map_base
+ * and aligned based on the order of @size.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
- unsigned long addr;
- int err;
+ unsigned long base, addr;
+ int ret = 0;
hyp_spin_lock(&pkvm_pgd_lock);
- size = PAGE_ALIGN(size + offset_in_page(phys));
- addr = __io_map_base;
- __io_map_base += size;
+ /* Align the allocation based on the order of its size */
+ addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
- /* Are we overflowing on the vmemmap ? */
- if (__io_map_base > __hyp_vmemmap) {
- __io_map_base -= size;
- addr = (unsigned long)ERR_PTR(-ENOMEM);
- goto out;
- }
+ /* The allocated size is always a multiple of PAGE_SIZE */
+ base = addr + PAGE_ALIGN(size);
- err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot);
- if (err) {
- addr = (unsigned long)ERR_PTR(err);
- goto out;
+ /* Are we overflowing on the vmemmap ? */
+ if (!addr || base > __hyp_vmemmap)
+ ret = -ENOMEM;
+ else {
+ __io_map_base = base;
+ *haddr = addr;
}
- addr = addr + offset_in_page(phys);
-out:
hyp_spin_unlock(&pkvm_pgd_lock);
- return addr;
+ return ret;
+}
+
+int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
+ enum kvm_pgtable_prot prot,
+ unsigned long *haddr)
+{
+ unsigned long addr;
+ int err;
+
+ size += offset_in_page(phys);
+ err = pkvm_alloc_private_va_range(size, &addr);
+ if (err)
+ return err;
+
+ err = __pkvm_create_mappings(addr, size, phys, prot);
+ if (err)
+ return err;
+
+ *haddr = addr + offset_in_page(phys);
+ return err;
}
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
@@ -146,7 +170,8 @@ int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot)
int hyp_map_vectors(void)
{
phys_addr_t phys;
- void *bp_base;
+ unsigned long bp_base;
+ int ret;
if (!kvm_system_needs_idmapped_vectors()) {
__hyp_bp_vect_base = __bp_harden_hyp_vecs;
@@ -154,13 +179,12 @@ int hyp_map_vectors(void)
}
phys = __hyp_pa(__bp_harden_hyp_vecs);
- bp_base = (void *)__pkvm_create_private_mapping(phys,
- __BP_HARDEN_HYP_VECS_SZ,
- PAGE_HYP_EXEC);
- if (IS_ERR_OR_NULL(bp_base))
- return PTR_ERR(bp_base);
+ ret = __pkvm_create_private_mapping(phys, __BP_HARDEN_HYP_VECS_SZ,
+ PAGE_HYP_EXEC, &bp_base);
+ if (ret)
+ return ret;
- __hyp_bp_vect_base = bp_base;
+ __hyp_bp_vect_base = (void *)bp_base;
return 0;
}
--
2.35.1.1178.g4f1659d476-goog
On Fri, 08 Apr 2022 21:03:25 +0100,
Kalesh Singh <[email protected]> wrote:
>
> pkvm_hyp_alloc_private_va_range() can be used to reserve private VA ranges
> in the pKVM nVHE hypervisor. Allocations are aligned based on the order of
> the requested size.
>
> This will be used to implement stack guard pages for pKVM nVHE hypervisor
> (in a subsequent patch in the series).
>
> Credits to Quentin Perret <[email protected]> for the idea of moving
> private VA allocation out of __pkvm_create_private_mapping()
>
> Signed-off-by: Kalesh Singh <[email protected]>
> Tested-by: Fuad Tabba <[email protected]>
> Reviewed-by: Fuad Tabba <[email protected]>
> ---
>
> Changes in v7:
> - Add Fuad's Reviewed-by and Tested-by tags.
>
> Changes in v6:
> - Update kernel-doc for pkvm_alloc_private_va_range() and add
> return description, per Stephen
> - Update pkvm_alloc_private_va_range() to return an int error code,
> per Stephen
> - Update __pkvm_create_private_mapping to return an in error code,
> per Quentin
> - Update callers of __pkvm_create_private_mapping() to handle new
> return value and params.
>
> Changes in v5:
> - Align private allocations based on the order of their size, per Marc
>
> Changes in v4:
> - Handle null ptr in pkvm_alloc_private_va_range() and replace
> IS_ERR_OR_NULL checks in callers with IS_ERR checks, per Fuad
> - Fix kernel-doc comments format, per Fuad
> - Format __pkvm_create_private_mapping() prototype args (< 80 col), per Fuad
>
> Changes in v3:
> - Handle null ptr in IS_ERR_OR_NULL checks, per Mark
>
> Changes in v2:
> - Allow specifying an alignment for the private VA allocations, per Marc
>
>
> arch/arm64/kvm/hyp/include/nvhe/mm.h | 6 ++-
> arch/arm64/kvm/hyp/nvhe/hyp-main.c | 18 ++++++-
> arch/arm64/kvm/hyp/nvhe/mm.c | 78 ++++++++++++++++++----------
> 3 files changed, 72 insertions(+), 30 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
> index 2d08510c6cc1..42d8eb9bfe72 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
> @@ -19,8 +19,10 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
> int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
> int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
> int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
> -unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
> - enum kvm_pgtable_prot prot);
> +int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
> + enum kvm_pgtable_prot prot,
> + unsigned long *haddr);
> +int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
>
> static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
> unsigned long *start, unsigned long *end)
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index 5e2197db0d32..3cea4b6ac23e 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -160,7 +160,23 @@ static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ct
> DECLARE_REG(size_t, size, host_ctxt, 2);
> DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
>
> - cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot);
> + /*
> + * __pkvm_create_private_mapping() populates a pointer with the
> + * hypervisor start address of the allocation.
> + *
> + * However, handle___pkvm_create_private_mapping() hypercall crosses the
> + * EL1/EL2 boundary so the pointer would not be valid in this context.
> + *
> + * Instead pass the allocation address as the return value (or return
> + * ERR_PTR() on failure).
> + */
> + unsigned long haddr;
> + int err = __pkvm_create_private_mapping(phys, size, prot, &haddr);
> +
> + if (err)
> + haddr = (unsigned long)ERR_PTR(err);
> +
> + cpu_reg(host_ctxt, 1) = haddr;
> }
>
> static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
> diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
> index cdbe8e246418..670f11349070 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mm.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mm.c
> @@ -37,36 +37,60 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
> return err;
> }
>
> -unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
> - enum kvm_pgtable_prot prot)
> +/**
> + * pkvm_alloc_private_va_range - Allocates a private VA range.
> + * @size: The size of the VA range to reserve.
> + * @haddr: The hypervisor virtual start address of the allocation.
> + *
> + * The private virtual address (VA) range is allocated above __io_map_base
> + * and aligned based on the order of @size.
> + *
> + * Return: 0 on success or negative error code on failure.
> + */
> +int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
> {
> - unsigned long addr;
> - int err;
> + unsigned long base, addr;
> + int ret = 0;
>
> hyp_spin_lock(&pkvm_pgd_lock);
>
> - size = PAGE_ALIGN(size + offset_in_page(phys));
> - addr = __io_map_base;
> - __io_map_base += size;
> + /* Align the allocation based on the order of its size */
> + addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
>
> - /* Are we overflowing on the vmemmap ? */
> - if (__io_map_base > __hyp_vmemmap) {
> - __io_map_base -= size;
> - addr = (unsigned long)ERR_PTR(-ENOMEM);
> - goto out;
> - }
> + /* The allocated size is always a multiple of PAGE_SIZE */
> + base = addr + PAGE_ALIGN(size);
>
> - err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot);
> - if (err) {
> - addr = (unsigned long)ERR_PTR(err);
> - goto out;
> + /* Are we overflowing on the vmemmap ? */
> + if (!addr || base > __hyp_vmemmap)
> + ret = -ENOMEM;
> + else {
> + __io_map_base = base;
> + *haddr = addr;
> }
>
> - addr = addr + offset_in_page(phys);
> -out:
> hyp_spin_unlock(&pkvm_pgd_lock);
>
> - return addr;
> + return ret;
> +}
> +
> +int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
> + enum kvm_pgtable_prot prot,
> + unsigned long *haddr)
> +{
> + unsigned long addr;
> + int err;
> +
> + size += offset_in_page(phys);
I have the same comment as for the previous patch. Keep the ALIGN()
here in order to make the code readable (it is just an add+and on a
slow path).
> + err = pkvm_alloc_private_va_range(size, &addr);
> + if (err)
> + return err;
> +
> + err = __pkvm_create_mappings(addr, size, phys, prot);
> + if (err)
> + return err;
> +
> + *haddr = addr + offset_in_page(phys);
> + return err;
> }
>
> int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
> @@ -146,7 +170,8 @@ int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot)
> int hyp_map_vectors(void)
> {
> phys_addr_t phys;
> - void *bp_base;
> + unsigned long bp_base;
> + int ret;
>
> if (!kvm_system_needs_idmapped_vectors()) {
> __hyp_bp_vect_base = __bp_harden_hyp_vecs;
> @@ -154,13 +179,12 @@ int hyp_map_vectors(void)
> }
>
> phys = __hyp_pa(__bp_harden_hyp_vecs);
> - bp_base = (void *)__pkvm_create_private_mapping(phys,
> - __BP_HARDEN_HYP_VECS_SZ,
> - PAGE_HYP_EXEC);
> - if (IS_ERR_OR_NULL(bp_base))
> - return PTR_ERR(bp_base);
> + ret = __pkvm_create_private_mapping(phys, __BP_HARDEN_HYP_VECS_SZ,
> + PAGE_HYP_EXEC, &bp_base);
> + if (ret)
> + return ret;
>
> - __hyp_bp_vect_base = bp_base;
> + __hyp_bp_vect_base = (void *)bp_base;
>
> return 0;
> }
Thanks,
M.
--
Without deviation from the norm, progress is not possible.