2021-03-17 14:19:11

by Quentin Perret

[permalink] [raw]
Subject: [PATCH 1/2] KVM: arm64: Introduce KVM_PGTABLE_S2_NOFWB Stage-2 flag

In order to further configure stage-2 page-tables, pass flags to the
init function using a new enum.

The first of these flags allows to disable FWB even if the hardware
supports it as we will need to do so for the host stage-2.

Signed-off-by: Quentin Perret <[email protected]>

---

One question is, do we want to use stage2_has_fwb() everywhere, including
guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?

That'd make this patch more intrusive, but would make the whole codebase
work with FWB enabled on a guest by guest basis. I don't see us use that
anytime soon (other than maybe debug of some sort?) but it'd be good to
have an agreement.
---
arch/arm64/include/asm/kvm_pgtable.h | 19 +++++++++--
arch/arm64/include/asm/pgtable-prot.h | 4 +--
arch/arm64/kvm/hyp/pgtable.c | 49 +++++++++++++++++----------
3 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index b93a2a3526ab..7382bdfb6284 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -56,6 +56,15 @@ struct kvm_pgtable_mm_ops {
phys_addr_t (*virt_to_phys)(void *addr);
};

+/**
+ * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
+ * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have
+ * ARM64_HAS_STAGE2_FWB.
+ */
+enum kvm_pgtable_stage2_flags {
+ KVM_PGTABLE_S2_NOFWB = BIT(0),
+};
+
/**
* struct kvm_pgtable - KVM page-table.
* @ia_bits: Maximum input address size, in bits.
@@ -72,6 +81,7 @@ struct kvm_pgtable {

/* Stage-2 only */
struct kvm_s2_mmu *mmu;
+ enum kvm_pgtable_stage2_flags flags;
};

/**
@@ -201,11 +211,16 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
* @arch: Arch-specific KVM structure representing the guest virtual
* machine.
* @mm_ops: Memory management callbacks.
+ * @flags: Stage-2 configuration flags.
*
* Return: 0 on success, negative error code on failure.
*/
-int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
- struct kvm_pgtable_mm_ops *mm_ops);
+int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+ struct kvm_pgtable_mm_ops *mm_ops,
+ enum kvm_pgtable_stage2_flags flags);
+
+#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
+ kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)

/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 046be789fbb4..beeb722a82d3 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -72,10 +72,10 @@ extern bool arm64_use_ng_mappings;
#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)

-#define PAGE_S2_MEMATTR(attr) \
+#define PAGE_S2_MEMATTR(attr, has_fwb) \
({ \
u64 __val; \
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \
+ if (has_fwb) \
__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \
else \
__val = PTE_S2_MEMATTR(MT_S2_ ## attr); \
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 3a971df278bd..dee8aaeaf13e 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -507,12 +507,25 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
return vtcr;
}

-static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
+static bool stage2_has_fwb(struct kvm_pgtable *pgt)
+{
+ if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+ return false;
+
+ return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
+}
+
+static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep,
+ struct kvm_pgtable *pgt)
{
bool device = prot & KVM_PGTABLE_PROT_DEVICE;
- kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
- PAGE_S2_MEMATTR(NORMAL);
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
+ kvm_pte_t attr;
+
+ if (device)
+ attr = PAGE_S2_MEMATTR(DEVICE_nGnRE, stage2_has_fwb(pgt));
+ else
+ attr = PAGE_S2_MEMATTR(NORMAL, stage2_has_fwb(pgt));

if (!(prot & KVM_PGTABLE_PROT_X))
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
@@ -748,7 +761,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
.arg = &map_data,
};

- ret = stage2_set_prot_attr(prot, &map_data.attr);
+ ret = stage2_set_prot_attr(prot, &map_data.attr, pgt);
if (ret)
return ret;

@@ -786,16 +799,13 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,

static void stage2_flush_dcache(void *addr, u64 size)
{
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
- return;
-
__flush_dcache_area(addr, size);
}

-static bool stage2_pte_cacheable(kvm_pte_t pte)
+static bool stage2_pte_cacheable(kvm_pte_t pte, struct kvm_pgtable *pgt)
{
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
- return memattr == PAGE_S2_MEMATTR(NORMAL);
+ return memattr == PAGE_S2_MEMATTR(NORMAL, stage2_has_fwb(pgt));
}

static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
@@ -821,8 +831,8 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,

if (mm_ops->page_count(childp) != 1)
return 0;
- } else if (stage2_pte_cacheable(pte)) {
- need_flush = true;
+ } else if (stage2_pte_cacheable(pte, pgt)) {
+ need_flush = !stage2_has_fwb(pgt);
}

/*
@@ -979,10 +989,11 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
- struct kvm_pgtable_mm_ops *mm_ops = arg;
+ struct kvm_pgtable *pgt = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep;

- if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte))
+ if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte, pgt))
return 0;

stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
@@ -994,17 +1005,18 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
struct kvm_pgtable_walker walker = {
.cb = stage2_flush_walker,
.flags = KVM_PGTABLE_WALK_LEAF,
- .arg = pgt->mm_ops,
+ .arg = pgt,
};

- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+ if (stage2_has_fwb(pgt))
return 0;

return kvm_pgtable_walk(pgt, addr, size, &walker);
}

-int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
- struct kvm_pgtable_mm_ops *mm_ops)
+int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+ struct kvm_pgtable_mm_ops *mm_ops,
+ enum kvm_pgtable_stage2_flags flags)
{
size_t pgd_sz;
u64 vtcr = arch->vtcr;
@@ -1017,6 +1029,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
if (!pgt->pgd)
return -ENOMEM;

+ pgt->flags = flags;
pgt->ia_bits = ia_bits;
pgt->start_level = start_level;
pgt->mm_ops = mm_ops;
@@ -1101,7 +1114,7 @@ int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
u32 level;
int ret;

- ret = stage2_set_prot_attr(prot, &attr);
+ ret = stage2_set_prot_attr(prot, &attr, pgt);
if (ret)
return ret;
attr &= KVM_PTE_LEAF_S2_COMPAT_MASK;
--
2.31.0.rc2.261.g7f71774620-goog


2021-03-17 14:43:41

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: arm64: Introduce KVM_PGTABLE_S2_NOFWB Stage-2 flag

Hi Quentin,

On Wed, 17 Mar 2021 14:17:13 +0000,
Quentin Perret <[email protected]> wrote:
>
> In order to further configure stage-2 page-tables, pass flags to the
> init function using a new enum.
>
> The first of these flags allows to disable FWB even if the hardware
> supports it as we will need to do so for the host stage-2.
>
> Signed-off-by: Quentin Perret <[email protected]>
>
> ---
>
> One question is, do we want to use stage2_has_fwb() everywhere, including
> guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?
>
> That'd make this patch more intrusive, but would make the whole codebase
> work with FWB enabled on a guest by guest basis. I don't see us use that
> anytime soon (other than maybe debug of some sort?) but it'd be good to
> have an agreement.

I'm not sure how useful that would be. We fought long and hard to get
FWB, and I can't see a good reason to disable it for guests unless the
HW was buggy (but in which case that'd be for everyone). I'd rather
keep the changes small for now (this whole series is invasive
enough!).

As for this patch, I only have a few cosmetic comments:

> ---
> arch/arm64/include/asm/kvm_pgtable.h | 19 +++++++++--
> arch/arm64/include/asm/pgtable-prot.h | 4 +--
> arch/arm64/kvm/hyp/pgtable.c | 49 +++++++++++++++++----------
> 3 files changed, 50 insertions(+), 22 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index b93a2a3526ab..7382bdfb6284 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -56,6 +56,15 @@ struct kvm_pgtable_mm_ops {
> phys_addr_t (*virt_to_phys)(void *addr);
> };
>
> +/**
> + * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
> + * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have
> + * ARM64_HAS_STAGE2_FWB.
> + */
> +enum kvm_pgtable_stage2_flags {
> + KVM_PGTABLE_S2_NOFWB = BIT(0),
> +};
> +
> /**
> * struct kvm_pgtable - KVM page-table.
> * @ia_bits: Maximum input address size, in bits.
> @@ -72,6 +81,7 @@ struct kvm_pgtable {
>
> /* Stage-2 only */
> struct kvm_s2_mmu *mmu;
> + enum kvm_pgtable_stage2_flags flags;
> };
>
> /**
> @@ -201,11 +211,16 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
> * @arch: Arch-specific KVM structure representing the guest virtual
> * machine.
> * @mm_ops: Memory management callbacks.
> + * @flags: Stage-2 configuration flags.
> *
> * Return: 0 on success, negative error code on failure.
> */
> -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> - struct kvm_pgtable_mm_ops *mm_ops);
> +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> + struct kvm_pgtable_mm_ops *mm_ops,
> + enum kvm_pgtable_stage2_flags flags);
> +
> +#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
> + kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
>
> /**
> * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index 046be789fbb4..beeb722a82d3 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -72,10 +72,10 @@ extern bool arm64_use_ng_mappings;
> #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
> #define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
>
> -#define PAGE_S2_MEMATTR(attr) \
> +#define PAGE_S2_MEMATTR(attr, has_fwb) \
> ({ \
> u64 __val; \
> - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \
> + if (has_fwb) \
> __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \
> else \
> __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 3a971df278bd..dee8aaeaf13e 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -507,12 +507,25 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
> return vtcr;
> }
>
> -static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
> +static bool stage2_has_fwb(struct kvm_pgtable *pgt)
> +{
> + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
> + return false;
> +
> + return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
> +}
> +
> +static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep,
> + struct kvm_pgtable *pgt)

nit: make pgt the first parameter, as it defines the context in which
the rest applies.

> {
> bool device = prot & KVM_PGTABLE_PROT_DEVICE;
> - kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
> - PAGE_S2_MEMATTR(NORMAL);
> u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
> + kvm_pte_t attr;
> +
> + if (device)
> + attr = PAGE_S2_MEMATTR(DEVICE_nGnRE, stage2_has_fwb(pgt));
> + else
> + attr = PAGE_S2_MEMATTR(NORMAL, stage2_has_fwb(pgt));

Maybe define a new helper:

#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))

to avoid the constant stage2_has_fwb() repetition.

>
> if (!(prot & KVM_PGTABLE_PROT_X))
> attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
> @@ -748,7 +761,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
> .arg = &map_data,
> };
>
> - ret = stage2_set_prot_attr(prot, &map_data.attr);
> + ret = stage2_set_prot_attr(prot, &map_data.attr, pgt);
> if (ret)
> return ret;
>
> @@ -786,16 +799,13 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
>
> static void stage2_flush_dcache(void *addr, u64 size)
> {
> - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
> - return;
> -
> __flush_dcache_area(addr, size);
> }

Consider dropping the function altogether and use __flush_dcache_area
directly (assuming the prototypes are identical).

>
> -static bool stage2_pte_cacheable(kvm_pte_t pte)
> +static bool stage2_pte_cacheable(kvm_pte_t pte, struct kvm_pgtable *pgt)

Same comment about pgt being the first argument.

> {
> u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
> - return memattr == PAGE_S2_MEMATTR(NORMAL);
> + return memattr == PAGE_S2_MEMATTR(NORMAL, stage2_has_fwb(pgt));
> }
>
> static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
> @@ -821,8 +831,8 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>
> if (mm_ops->page_count(childp) != 1)
> return 0;
> - } else if (stage2_pte_cacheable(pte)) {
> - need_flush = true;
> + } else if (stage2_pte_cacheable(pte, pgt)) {
> + need_flush = !stage2_has_fwb(pgt);
> }
>
> /*
> @@ -979,10 +989,11 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
> enum kvm_pgtable_walk_flags flag,
> void * const arg)
> {
> - struct kvm_pgtable_mm_ops *mm_ops = arg;
> + struct kvm_pgtable *pgt = arg;
> + struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
> kvm_pte_t pte = *ptep;
>
> - if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte))
> + if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte, pgt))
> return 0;
>
> stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
> @@ -994,17 +1005,18 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
> struct kvm_pgtable_walker walker = {
> .cb = stage2_flush_walker,
> .flags = KVM_PGTABLE_WALK_LEAF,
> - .arg = pgt->mm_ops,
> + .arg = pgt,
> };
>
> - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
> + if (stage2_has_fwb(pgt))
> return 0;
>
> return kvm_pgtable_walk(pgt, addr, size, &walker);
> }
>
> -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> - struct kvm_pgtable_mm_ops *mm_ops)
> +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> + struct kvm_pgtable_mm_ops *mm_ops,
> + enum kvm_pgtable_stage2_flags flags)
> {
> size_t pgd_sz;
> u64 vtcr = arch->vtcr;
> @@ -1017,6 +1029,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> if (!pgt->pgd)
> return -ENOMEM;
>
> + pgt->flags = flags;

Try and keep the initialisation order similar to the definition of the
structure if possible.

> pgt->ia_bits = ia_bits;
> pgt->start_level = start_level;
> pgt->mm_ops = mm_ops;
> @@ -1101,7 +1114,7 @@ int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
> u32 level;
> int ret;
>
> - ret = stage2_set_prot_attr(prot, &attr);
> + ret = stage2_set_prot_attr(prot, &attr, pgt);
> if (ret)
> return ret;
> attr &= KVM_PTE_LEAF_S2_COMPAT_MASK;
> --
> 2.31.0.rc2.261.g7f71774620-goog
>
>

Thanks,

M.

--
Without deviation from the norm, progress is not possible.

2021-03-17 14:44:42

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: arm64: Introduce KVM_PGTABLE_S2_NOFWB Stage-2 flag

On Wed, Mar 17, 2021 at 02:17:13PM +0000, Quentin Perret wrote:
> In order to further configure stage-2 page-tables, pass flags to the
> init function using a new enum.
>
> The first of these flags allows to disable FWB even if the hardware
> supports it as we will need to do so for the host stage-2.
>
> Signed-off-by: Quentin Perret <[email protected]>
>
> ---
>
> One question is, do we want to use stage2_has_fwb() everywhere, including
> guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?
>
> That'd make this patch more intrusive, but would make the whole codebase
> work with FWB enabled on a guest by guest basis. I don't see us use that
> anytime soon (other than maybe debug of some sort?) but it'd be good to
> have an agreement.

I don't see the value in spreading this everywhere for now.

> arch/arm64/include/asm/kvm_pgtable.h | 19 +++++++++--
> arch/arm64/include/asm/pgtable-prot.h | 4 +--
> arch/arm64/kvm/hyp/pgtable.c | 49 +++++++++++++++++----------
> 3 files changed, 50 insertions(+), 22 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index b93a2a3526ab..7382bdfb6284 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -56,6 +56,15 @@ struct kvm_pgtable_mm_ops {
> phys_addr_t (*virt_to_phys)(void *addr);
> };
>
> +/**
> + * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
> + * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have
> + * ARM64_HAS_STAGE2_FWB.
> + */
> +enum kvm_pgtable_stage2_flags {
> + KVM_PGTABLE_S2_NOFWB = BIT(0),
> +};
> +
> /**
> * struct kvm_pgtable - KVM page-table.
> * @ia_bits: Maximum input address size, in bits.
> @@ -72,6 +81,7 @@ struct kvm_pgtable {
>
> /* Stage-2 only */
> struct kvm_s2_mmu *mmu;
> + enum kvm_pgtable_stage2_flags flags;
> };
>
> /**
> @@ -201,11 +211,16 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
> * @arch: Arch-specific KVM structure representing the guest virtual
> * machine.
> * @mm_ops: Memory management callbacks.
> + * @flags: Stage-2 configuration flags.
> *
> * Return: 0 on success, negative error code on failure.
> */
> -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> - struct kvm_pgtable_mm_ops *mm_ops);
> +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> + struct kvm_pgtable_mm_ops *mm_ops,
> + enum kvm_pgtable_stage2_flags flags);
> +
> +#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
> + kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)

nit: I think some of the kerneldoc refers to "kvm_pgtable_stage_init()"
so that needs a trivial update to e.g. "kvm_pgtable_stage_init*()".

> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index 046be789fbb4..beeb722a82d3 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -72,10 +72,10 @@ extern bool arm64_use_ng_mappings;
> #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
> #define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
>
> -#define PAGE_S2_MEMATTR(attr) \
> +#define PAGE_S2_MEMATTR(attr, has_fwb) \
> ({ \
> u64 __val; \
> - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \
> + if (has_fwb) \
> __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \
> else \
> __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \

Can you take the pgt structure instead of a bool here, or does it end up
being really ugly?

Will

2021-03-17 14:51:22

by Quentin Perret

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: arm64: Introduce KVM_PGTABLE_S2_NOFWB Stage-2 flag

On Wednesday 17 Mar 2021 at 14:41:31 (+0000), Marc Zyngier wrote:
> Hi Quentin,
>
> On Wed, 17 Mar 2021 14:17:13 +0000,
> Quentin Perret <[email protected]> wrote:
> >
> > In order to further configure stage-2 page-tables, pass flags to the
> > init function using a new enum.
> >
> > The first of these flags allows to disable FWB even if the hardware
> > supports it as we will need to do so for the host stage-2.
> >
> > Signed-off-by: Quentin Perret <[email protected]>
> >
> > ---
> >
> > One question is, do we want to use stage2_has_fwb() everywhere, including
> > guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?
> >
> > That'd make this patch more intrusive, but would make the whole codebase
> > work with FWB enabled on a guest by guest basis. I don't see us use that
> > anytime soon (other than maybe debug of some sort?) but it'd be good to
> > have an agreement.
>
> I'm not sure how useful that would be. We fought long and hard to get
> FWB, and I can't see a good reason to disable it for guests unless the
> HW was buggy (but in which case that'd be for everyone). I'd rather
> keep the changes small for now (this whole series is invasive
> enough!).

OK, that works for me.

> As for this patch, I only have a few cosmetic comments:

Happy with the suggestions, I'll fold that in v6.

Cheers,
Quentin

2021-03-17 15:54:32

by Quentin Perret

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: arm64: Introduce KVM_PGTABLE_S2_NOFWB Stage-2 flag

On Wednesday 17 Mar 2021 at 14:42:46 (+0000), Will Deacon wrote:
> On Wed, Mar 17, 2021 at 02:17:13PM +0000, Quentin Perret wrote:
> > In order to further configure stage-2 page-tables, pass flags to the
> > init function using a new enum.
> >
> > The first of these flags allows to disable FWB even if the hardware
> > supports it as we will need to do so for the host stage-2.
> >
> > Signed-off-by: Quentin Perret <[email protected]>
> >
> > ---
> >
> > One question is, do we want to use stage2_has_fwb() everywhere, including
> > guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?
> >
> > That'd make this patch more intrusive, but would make the whole codebase
> > work with FWB enabled on a guest by guest basis. I don't see us use that
> > anytime soon (other than maybe debug of some sort?) but it'd be good to
> > have an agreement.
>
> I don't see the value in spreading this everywhere for now.

Good. Sounds like we're all in agreement.

> > arch/arm64/include/asm/kvm_pgtable.h | 19 +++++++++--
> > arch/arm64/include/asm/pgtable-prot.h | 4 +--
> > arch/arm64/kvm/hyp/pgtable.c | 49 +++++++++++++++++----------
> > 3 files changed, 50 insertions(+), 22 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> > index b93a2a3526ab..7382bdfb6284 100644
> > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > @@ -56,6 +56,15 @@ struct kvm_pgtable_mm_ops {
> > phys_addr_t (*virt_to_phys)(void *addr);
> > };
> >
> > +/**
> > + * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
> > + * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have
> > + * ARM64_HAS_STAGE2_FWB.
> > + */
> > +enum kvm_pgtable_stage2_flags {
> > + KVM_PGTABLE_S2_NOFWB = BIT(0),
> > +};
> > +
> > /**
> > * struct kvm_pgtable - KVM page-table.
> > * @ia_bits: Maximum input address size, in bits.
> > @@ -72,6 +81,7 @@ struct kvm_pgtable {
> >
> > /* Stage-2 only */
> > struct kvm_s2_mmu *mmu;
> > + enum kvm_pgtable_stage2_flags flags;
> > };
> >
> > /**
> > @@ -201,11 +211,16 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
> > * @arch: Arch-specific KVM structure representing the guest virtual
> > * machine.
> > * @mm_ops: Memory management callbacks.
> > + * @flags: Stage-2 configuration flags.
> > *
> > * Return: 0 on success, negative error code on failure.
> > */
> > -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> > - struct kvm_pgtable_mm_ops *mm_ops);
> > +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> > + struct kvm_pgtable_mm_ops *mm_ops,
> > + enum kvm_pgtable_stage2_flags flags);
> > +
> > +#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
> > + kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
>
> nit: I think some of the kerneldoc refers to "kvm_pgtable_stage_init()"
> so that needs a trivial update to e.g. "kvm_pgtable_stage_init*()".

Will do.

> > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> > index 046be789fbb4..beeb722a82d3 100644
> > --- a/arch/arm64/include/asm/pgtable-prot.h
> > +++ b/arch/arm64/include/asm/pgtable-prot.h
> > @@ -72,10 +72,10 @@ extern bool arm64_use_ng_mappings;
> > #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
> > #define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
> >
> > -#define PAGE_S2_MEMATTR(attr) \
> > +#define PAGE_S2_MEMATTR(attr, has_fwb) \
> > ({ \
> > u64 __val; \
> > - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \
> > + if (has_fwb) \
> > __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \
> > else \
> > __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \
>
> Can you take the pgt structure instead of a bool here, or does it end up
> being really ugly?

It means I need to expose the stage2_has_fwb() helper in pgtable.h so I
can use it here. But Marc suggested that I introduce another macro along
the lines of

#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))

which can be defined in pgtable.c and keep everything neatly contained
in there. So I think I'll go ahead with that unless you feel strongly
about it.

Cheers,
Quentin