2021-07-15 17:03:14

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

Introduce the infrastructure required to identify an IPA region
that is expected to be used as an MMIO window.

This include mapping, unmapping and checking the regions. Nothing
calls into it yet, so no expected functional change.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_host.h | 2 +
arch/arm64/include/asm/kvm_mmu.h | 5 ++
arch/arm64/kvm/mmu.c | 115 ++++++++++++++++++++++++++++++
3 files changed, 122 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4add6c27251f..914c1b7bb3ad 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -125,6 +125,8 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0
/* Memory Tagging Extension enabled for the guest */
#define KVM_ARCH_FLAG_MTE_ENABLED 1
+ /* Gues has bought into the MMIO guard extension */
+#define KVM_ARCH_FLAG_MMIO_GUARD 2
unsigned long flags;

/*
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b52c5c4b9a3d..f6b8fc1671b3 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -170,6 +170,11 @@ phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
int kvm_mmu_init(u32 *hyp_va_bits);

+/* MMIO guard */
+bool kvm_install_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
+bool kvm_remove_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
+bool kvm_check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
+
static inline void *__kvm_vector_slot2addr(void *base,
enum arm64_hyp_spectre_vector slot)
{
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 3155c9e778f0..638827c8842b 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1120,6 +1120,121 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
kvm_set_pfn_accessed(pte_pfn(pte));
}

+#define MMIO_NOTE ('M' << 24 | 'M' << 16 | 'I' << 8 | '0')
+
+bool kvm_install_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
+{
+ struct kvm_mmu_memory_cache *memcache;
+ struct kvm_memory_slot *memslot;
+ int ret, idx;
+
+ if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->kvm->arch.flags))
+ return false;
+
+ /* Must be page-aligned */
+ if (ipa & ~PAGE_MASK)
+ return false;
+
+ /*
+ * The page cannot be in a memslot. At some point, this will
+ * have to deal with device mappings though.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ memslot = gfn_to_memslot(vcpu->kvm, ipa >> PAGE_SHIFT);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (memslot)
+ return false;
+
+ /* Guest has direct access to the GICv2 virtual CPU interface */
+ if (irqchip_in_kernel(vcpu->kvm) &&
+ vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
+ ipa == vcpu->kvm->arch.vgic.vgic_cpu_base)
+ return true;
+
+ memcache = &vcpu->arch.mmu_page_cache;
+ if (kvm_mmu_topup_memory_cache(memcache,
+ kvm_mmu_cache_min_pages(vcpu->kvm)))
+ return false;
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+ ret = kvm_pgtable_stage2_annotate(vcpu->arch.hw_mmu->pgt,
+ ipa, PAGE_SIZE, memcache,
+ MMIO_NOTE);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+
+ return ret == 0;
+}
+
+struct s2_walk_data {
+ kvm_pte_t pteval;
+ u32 level;
+};
+
+static int s2_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+ struct s2_walk_data *data = arg;
+
+ data->level = level;
+ data->pteval = *ptep;
+ return 0;
+}
+
+/* Assumes mmu_lock taken */
+static bool __check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
+{
+ struct s2_walk_data data;
+ struct kvm_pgtable_walker walker = {
+ .cb = s2_walker,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ .arg = &data,
+ };
+
+ kvm_pgtable_walk(vcpu->arch.hw_mmu->pgt, ALIGN_DOWN(ipa, PAGE_SIZE),
+ PAGE_SIZE, &walker);
+
+ /* Must be a PAGE_SIZE mapping with our annotation */
+ return (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(data.level)) == PAGE_SIZE &&
+ data.pteval == MMIO_NOTE);
+}
+
+bool kvm_remove_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
+{
+ bool ret;
+
+ if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->kvm->arch.flags))
+ return false;
+
+ /* Keep the PT locked across the two walks */
+ spin_lock(&vcpu->kvm->mmu_lock);
+
+ ret = __check_ioguard_page(vcpu, ipa);
+ if (ret) /* Drop the annotation */
+ kvm_pgtable_stage2_unmap(vcpu->arch.hw_mmu->pgt,
+ ALIGN_DOWN(ipa, PAGE_SIZE), PAGE_SIZE);
+
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ return ret;
+}
+
+bool kvm_check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
+{
+ bool ret;
+
+ if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->kvm->arch.flags))
+ return true;
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+ ret = __check_ioguard_page(vcpu, ipa & PAGE_MASK);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+
+ if (!ret)
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
+ return ret;
+}
+
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
--
2.30.2


2021-07-20 11:15:15

by Quentin Perret

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Thursday 15 Jul 2021 at 17:31:47 (+0100), Marc Zyngier wrote:
> +struct s2_walk_data {
> + kvm_pte_t pteval;
> + u32 level;
> +};
> +
> +static int s2_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
> + enum kvm_pgtable_walk_flags flag, void * const arg)
> +{
> + struct s2_walk_data *data = arg;
> +
> + data->level = level;
> + data->pteval = *ptep;
> + return 0;
> +}
> +
> +/* Assumes mmu_lock taken */
> +static bool __check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
> +{
> + struct s2_walk_data data;
> + struct kvm_pgtable_walker walker = {
> + .cb = s2_walker,
> + .flags = KVM_PGTABLE_WALK_LEAF,
> + .arg = &data,
> + };
> +
> + kvm_pgtable_walk(vcpu->arch.hw_mmu->pgt, ALIGN_DOWN(ipa, PAGE_SIZE),
> + PAGE_SIZE, &walker);
> +
> + /* Must be a PAGE_SIZE mapping with our annotation */
> + return (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(data.level)) == PAGE_SIZE &&
> + data.pteval == MMIO_NOTE);

Nit: you could do this check in the walker directly and check the return
value of kvm_pgtable_walk() instead. That would allow to get rid of
struct s2_walk_data.

Also, though the compiler might be able to optimize, maybe simplify the
level check to level == (KVM_PGTABLE_MAX_LEVELS - 1)?

Thanks,
Quentin

> +}

2021-07-20 13:32:08

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Tue, 20 Jul 2021 12:13:20 +0100,
Quentin Perret <[email protected]> wrote:
>
> On Thursday 15 Jul 2021 at 17:31:47 (+0100), Marc Zyngier wrote:
> > +struct s2_walk_data {
> > + kvm_pte_t pteval;
> > + u32 level;
> > +};
> > +
> > +static int s2_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
> > + enum kvm_pgtable_walk_flags flag, void * const arg)
> > +{
> > + struct s2_walk_data *data = arg;
> > +
> > + data->level = level;
> > + data->pteval = *ptep;
> > + return 0;
> > +}
> > +
> > +/* Assumes mmu_lock taken */
> > +static bool __check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
> > +{
> > + struct s2_walk_data data;
> > + struct kvm_pgtable_walker walker = {
> > + .cb = s2_walker,
> > + .flags = KVM_PGTABLE_WALK_LEAF,
> > + .arg = &data,
> > + };
> > +
> > + kvm_pgtable_walk(vcpu->arch.hw_mmu->pgt, ALIGN_DOWN(ipa, PAGE_SIZE),
> > + PAGE_SIZE, &walker);
> > +
> > + /* Must be a PAGE_SIZE mapping with our annotation */
> > + return (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(data.level)) == PAGE_SIZE &&
> > + data.pteval == MMIO_NOTE);
>
> Nit: you could do this check in the walker directly and check the return
> value of kvm_pgtable_walk() instead. That would allow to get rid of
> struct s2_walk_data.
>
> Also, though the compiler might be able to optimize, maybe simplify the
> level check to level == (KVM_PGTABLE_MAX_LEVELS - 1)?

Yup, all good points. I guess I could do the same in my other series
that parses the userspace PT to extract the level.

Thanks,

M.

--
Without deviation from the norm, progress is not possible.

2021-07-20 15:59:41

by Quentin Perret

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Tuesday 20 Jul 2021 at 14:15:56 (+0100), Marc Zyngier wrote:
> On Tue, 20 Jul 2021 12:13:20 +0100,
> Quentin Perret <[email protected]> wrote:
> >
> > On Thursday 15 Jul 2021 at 17:31:47 (+0100), Marc Zyngier wrote:
> > > +struct s2_walk_data {
> > > + kvm_pte_t pteval;
> > > + u32 level;
> > > +};
> > > +
> > > +static int s2_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
> > > + enum kvm_pgtable_walk_flags flag, void * const arg)
> > > +{
> > > + struct s2_walk_data *data = arg;
> > > +
> > > + data->level = level;
> > > + data->pteval = *ptep;
> > > + return 0;
> > > +}
> > > +
> > > +/* Assumes mmu_lock taken */
> > > +static bool __check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
> > > +{
> > > + struct s2_walk_data data;
> > > + struct kvm_pgtable_walker walker = {
> > > + .cb = s2_walker,
> > > + .flags = KVM_PGTABLE_WALK_LEAF,
> > > + .arg = &data,
> > > + };
> > > +
> > > + kvm_pgtable_walk(vcpu->arch.hw_mmu->pgt, ALIGN_DOWN(ipa, PAGE_SIZE),
> > > + PAGE_SIZE, &walker);
> > > +
> > > + /* Must be a PAGE_SIZE mapping with our annotation */
> > > + return (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(data.level)) == PAGE_SIZE &&
> > > + data.pteval == MMIO_NOTE);
> >
> > Nit: you could do this check in the walker directly and check the return
> > value of kvm_pgtable_walk() instead. That would allow to get rid of
> > struct s2_walk_data.
> >
> > Also, though the compiler might be able to optimize, maybe simplify the
> > level check to level == (KVM_PGTABLE_MAX_LEVELS - 1)?
>
> Yup, all good points. I guess I could do the same in my other series
> that parses the userspace PT to extract the level.

Well, actually, let me take that back. I think something like you have
would be useful, but in pgtable.c directly and re-usable for stage-1 and
stage-2 walks. Maybe something like the below (totally untested)?

I could use such a walker in several places as well in the memory
ownership series:

- following the idea of [1], I could remove the
kvm_pgtable_stage2_find_range() function entirely;

- [2] defines 2 custom walkers that do nothing but walk host stage-2
and hyp stage-1 page-tables to check permissions and such -- they
could be removed/re-implemented easily as well.

And you seem to need something similar here, so clearly there is a need.
WDYT?

Thanks,
Quentin

[1] https://lore.kernel.org/kvmarm/[email protected]/
[2] https://lore.kernel.org/kvmarm/[email protected]/

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index e0ae57dca827..bd6d26f27e1a 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -357,6 +357,38 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
return _kvm_pgtable_walk(&walk_data);
}

+struct get_leaf_data {
+ kvm_pte_t *ptep;
+ u32 *level;
+};
+
+static int get_leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+ struct get_leaf_data *data = arg;
+
+ *(data->ptep) = *ptep;
+ *(data->level) = level;
+
+ return 0;
+}
+
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, kvm_pte_t *ptep,
+ u32 *level)
+{
+ struct get_leaf_data data = {
+ .ptep = ptep,
+ .level = level,
+ };
+ struct kvm_pgtable_walker __get_leaf_walker = {
+ .cb = get_leaf_walker,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ .arg = &data,
+ };
+
+ return kvm_pgtable_walk(pgt, addr, PAGE_SIZE, &__get_leaf_walker);
+}
+
struct hyp_map_data {
u64 phys;
kvm_pte_t attr;

2021-07-22 18:06:36

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Tue, 20 Jul 2021 16:49:45 +0100,
Quentin Perret <[email protected]> wrote:
>
> On Tuesday 20 Jul 2021 at 14:15:56 (+0100), Marc Zyngier wrote:
> > On Tue, 20 Jul 2021 12:13:20 +0100,
> > Quentin Perret <[email protected]> wrote:
> > >
> > > On Thursday 15 Jul 2021 at 17:31:47 (+0100), Marc Zyngier wrote:
> > > > +struct s2_walk_data {
> > > > + kvm_pte_t pteval;
> > > > + u32 level;
> > > > +};
> > > > +
> > > > +static int s2_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
> > > > + enum kvm_pgtable_walk_flags flag, void * const arg)
> > > > +{
> > > > + struct s2_walk_data *data = arg;
> > > > +
> > > > + data->level = level;
> > > > + data->pteval = *ptep;
> > > > + return 0;
> > > > +}
> > > > +
> > > > +/* Assumes mmu_lock taken */
> > > > +static bool __check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
> > > > +{
> > > > + struct s2_walk_data data;
> > > > + struct kvm_pgtable_walker walker = {
> > > > + .cb = s2_walker,
> > > > + .flags = KVM_PGTABLE_WALK_LEAF,
> > > > + .arg = &data,
> > > > + };
> > > > +
> > > > + kvm_pgtable_walk(vcpu->arch.hw_mmu->pgt, ALIGN_DOWN(ipa, PAGE_SIZE),
> > > > + PAGE_SIZE, &walker);
> > > > +
> > > > + /* Must be a PAGE_SIZE mapping with our annotation */
> > > > + return (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(data.level)) == PAGE_SIZE &&
> > > > + data.pteval == MMIO_NOTE);
> > >
> > > Nit: you could do this check in the walker directly and check the return
> > > value of kvm_pgtable_walk() instead. That would allow to get rid of
> > > struct s2_walk_data.
> > >
> > > Also, though the compiler might be able to optimize, maybe simplify the
> > > level check to level == (KVM_PGTABLE_MAX_LEVELS - 1)?
> >
> > Yup, all good points. I guess I could do the same in my other series
> > that parses the userspace PT to extract the level.
>
> Well, actually, let me take that back. I think something like you have
> would be useful, but in pgtable.c directly and re-usable for stage-1 and
> stage-2 walks. Maybe something like the below (totally untested)?
>
> I could use such a walker in several places as well in the memory
> ownership series:
>
> - following the idea of [1], I could remove the
> kvm_pgtable_stage2_find_range() function entirely;
>
> - [2] defines 2 custom walkers that do nothing but walk host stage-2
> and hyp stage-1 page-tables to check permissions and such -- they
> could be removed/re-implemented easily as well.
>
> And you seem to need something similar here, so clearly there is a need.
> WDYT?

So FWIW, I've now pushed out an updated series for the THP changes[1],
and you will find a similar patch at the base of the branch. Please
have a look and let me know what you think!

Thanks,

M.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=kvm-arm64/mmu/mapping-levels

--
Without deviation from the norm, progress is not possible.

2021-07-23 10:18:07

by Quentin Perret

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Thursday 22 Jul 2021 at 19:04:59 (+0100), Marc Zyngier wrote:
> So FWIW, I've now pushed out an updated series for the THP changes[1],
> and you will find a similar patch at the base of the branch. Please
> have a look and let me know what you think!

I Like the look of it! I'll pull this patch in my series and rebase on
top -- that should introduce three new users or so, and allow a few nice
cleanups.

Thanks!
Quentin

2021-07-27 18:13:31

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Thu, Jul 15, 2021 at 05:31:47PM +0100, Marc Zyngier wrote:
> Introduce the infrastructure required to identify an IPA region
> that is expected to be used as an MMIO window.
>
> This include mapping, unmapping and checking the regions. Nothing
> calls into it yet, so no expected functional change.
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/include/asm/kvm_host.h | 2 +
> arch/arm64/include/asm/kvm_mmu.h | 5 ++
> arch/arm64/kvm/mmu.c | 115 ++++++++++++++++++++++++++++++
> 3 files changed, 122 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 4add6c27251f..914c1b7bb3ad 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -125,6 +125,8 @@ struct kvm_arch {
> #define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0
> /* Memory Tagging Extension enabled for the guest */
> #define KVM_ARCH_FLAG_MTE_ENABLED 1
> + /* Gues has bought into the MMIO guard extension */
> +#define KVM_ARCH_FLAG_MMIO_GUARD 2
> unsigned long flags;
>
> /*
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index b52c5c4b9a3d..f6b8fc1671b3 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -170,6 +170,11 @@ phys_addr_t kvm_mmu_get_httbr(void);
> phys_addr_t kvm_get_idmap_vector(void);
> int kvm_mmu_init(u32 *hyp_va_bits);
>
> +/* MMIO guard */
> +bool kvm_install_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
> +bool kvm_remove_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
> +bool kvm_check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
> +
> static inline void *__kvm_vector_slot2addr(void *base,
> enum arm64_hyp_spectre_vector slot)
> {
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 3155c9e778f0..638827c8842b 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -1120,6 +1120,121 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
> kvm_set_pfn_accessed(pte_pfn(pte));
> }
>
> +#define MMIO_NOTE ('M' << 24 | 'M' << 16 | 'I' << 8 | '0')

Although this made me smile, maybe we should carve up the bit space a bit
more carefully ;) Also, you know somebody clever will "fix" that typo to
'O'!

Quentin, as the other user of this stuff at the moment, how do you see the
annotation space being allocated? Feels like we should have some 'type'
bits which decide how to parse the rest of the entry.

> +
> +bool kvm_install_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
> +{
> + struct kvm_mmu_memory_cache *memcache;
> + struct kvm_memory_slot *memslot;
> + int ret, idx;
> +
> + if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->kvm->arch.flags))
> + return false;
> +
> + /* Must be page-aligned */
> + if (ipa & ~PAGE_MASK)
> + return false;
> +
> + /*
> + * The page cannot be in a memslot. At some point, this will
> + * have to deal with device mappings though.
> + */
> + idx = srcu_read_lock(&vcpu->kvm->srcu);
> + memslot = gfn_to_memslot(vcpu->kvm, ipa >> PAGE_SHIFT);
> + srcu_read_unlock(&vcpu->kvm->srcu, idx);

What does this memslot check achieve? A new memslot could be added after
you've checked, no?

> +/* Assumes mmu_lock taken */

You can use a lockdep assertion for that!

Will

2021-07-28 09:59:34

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Tue, 27 Jul 2021 19:11:08 +0100,
Will Deacon <[email protected]> wrote:
>
> On Thu, Jul 15, 2021 at 05:31:47PM +0100, Marc Zyngier wrote:
> > Introduce the infrastructure required to identify an IPA region
> > that is expected to be used as an MMIO window.
> >
> > This include mapping, unmapping and checking the regions. Nothing
> > calls into it yet, so no expected functional change.
> >
> > Signed-off-by: Marc Zyngier <[email protected]>
> > ---
> > arch/arm64/include/asm/kvm_host.h | 2 +
> > arch/arm64/include/asm/kvm_mmu.h | 5 ++
> > arch/arm64/kvm/mmu.c | 115 ++++++++++++++++++++++++++++++
> > 3 files changed, 122 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 4add6c27251f..914c1b7bb3ad 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -125,6 +125,8 @@ struct kvm_arch {
> > #define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0
> > /* Memory Tagging Extension enabled for the guest */
> > #define KVM_ARCH_FLAG_MTE_ENABLED 1
> > + /* Gues has bought into the MMIO guard extension */
> > +#define KVM_ARCH_FLAG_MMIO_GUARD 2
> > unsigned long flags;
> >
> > /*
> > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> > index b52c5c4b9a3d..f6b8fc1671b3 100644
> > --- a/arch/arm64/include/asm/kvm_mmu.h
> > +++ b/arch/arm64/include/asm/kvm_mmu.h
> > @@ -170,6 +170,11 @@ phys_addr_t kvm_mmu_get_httbr(void);
> > phys_addr_t kvm_get_idmap_vector(void);
> > int kvm_mmu_init(u32 *hyp_va_bits);
> >
> > +/* MMIO guard */
> > +bool kvm_install_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
> > +bool kvm_remove_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
> > +bool kvm_check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
> > +
> > static inline void *__kvm_vector_slot2addr(void *base,
> > enum arm64_hyp_spectre_vector slot)
> > {
> > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > index 3155c9e778f0..638827c8842b 100644
> > --- a/arch/arm64/kvm/mmu.c
> > +++ b/arch/arm64/kvm/mmu.c
> > @@ -1120,6 +1120,121 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
> > kvm_set_pfn_accessed(pte_pfn(pte));
> > }
> >
> > +#define MMIO_NOTE ('M' << 24 | 'M' << 16 | 'I' << 8 | '0')
>
> Although this made me smile, maybe we should carve up the bit space a bit
> more carefully ;) Also, you know somebody clever will "fix" that typo to
> 'O'!

They'll get to keep the pieces when the whole thing breaks!

More seriously, happy to have a more elaborate allocation scheme. For
the purpose of this series, it really doesn't matter.

> Quentin, as the other user of this stuff at the moment, how do you see the
> annotation space being allocated? Feels like we should have some 'type'
> bits which decide how to parse the rest of the entry.
>
> > +
> > +bool kvm_install_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
> > +{
> > + struct kvm_mmu_memory_cache *memcache;
> > + struct kvm_memory_slot *memslot;
> > + int ret, idx;
> > +
> > + if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->kvm->arch.flags))
> > + return false;
> > +
> > + /* Must be page-aligned */
> > + if (ipa & ~PAGE_MASK)
> > + return false;
> > +
> > + /*
> > + * The page cannot be in a memslot. At some point, this will
> > + * have to deal with device mappings though.
> > + */
> > + idx = srcu_read_lock(&vcpu->kvm->srcu);
> > + memslot = gfn_to_memslot(vcpu->kvm, ipa >> PAGE_SHIFT);
> > + srcu_read_unlock(&vcpu->kvm->srcu, idx);
>
> What does this memslot check achieve? A new memslot could be added after
> you've checked, no?

If you start allowing S2 annotations to coexist with potential memory
mappings, you're in for trouble. The faulting logic will happily
overwrite the annotation, and that's probably not what you want.

As for new (or moving) memslots, I guess they should be checked
against existing annotations.

>
> > +/* Assumes mmu_lock taken */
>
> You can use a lockdep assertion for that!

Sure.

Thanks,

M.

--
Without deviation from the norm, progress is not possible.

2021-07-30 12:28:29

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Wed, Jul 28, 2021 at 10:57:30AM +0100, Marc Zyngier wrote:
> On Tue, 27 Jul 2021 19:11:08 +0100,
> Will Deacon <[email protected]> wrote:
> > On Thu, Jul 15, 2021 at 05:31:47PM +0100, Marc Zyngier wrote:
> > > +bool kvm_install_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
> > > +{
> > > + struct kvm_mmu_memory_cache *memcache;
> > > + struct kvm_memory_slot *memslot;
> > > + int ret, idx;
> > > +
> > > + if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->kvm->arch.flags))
> > > + return false;
> > > +
> > > + /* Must be page-aligned */
> > > + if (ipa & ~PAGE_MASK)
> > > + return false;
> > > +
> > > + /*
> > > + * The page cannot be in a memslot. At some point, this will
> > > + * have to deal with device mappings though.
> > > + */
> > > + idx = srcu_read_lock(&vcpu->kvm->srcu);
> > > + memslot = gfn_to_memslot(vcpu->kvm, ipa >> PAGE_SHIFT);
> > > + srcu_read_unlock(&vcpu->kvm->srcu, idx);
> >
> > What does this memslot check achieve? A new memslot could be added after
> > you've checked, no?
>
> If you start allowing S2 annotations to coexist with potential memory
> mappings, you're in for trouble. The faulting logic will happily
> overwrite the annotation, and that's probably not what you want.

I don't disagree, but the check above appears to be racy.

> As for new (or moving) memslots, I guess they should be checked
> against existing annotations.

Something like that, but the devil is in the details as it will need to
synchronize with this check somehow.

Will

2021-07-30 12:59:49

by Quentin Perret

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Tuesday 27 Jul 2021 at 19:11:08 (+0100), Will Deacon wrote:
> On Thu, Jul 15, 2021 at 05:31:47PM +0100, Marc Zyngier wrote:
> > Introduce the infrastructure required to identify an IPA region
> > that is expected to be used as an MMIO window.
> >
> > This include mapping, unmapping and checking the regions. Nothing
> > calls into it yet, so no expected functional change.
> >
> > Signed-off-by: Marc Zyngier <[email protected]>
> > ---
> > arch/arm64/include/asm/kvm_host.h | 2 +
> > arch/arm64/include/asm/kvm_mmu.h | 5 ++
> > arch/arm64/kvm/mmu.c | 115 ++++++++++++++++++++++++++++++
> > 3 files changed, 122 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 4add6c27251f..914c1b7bb3ad 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -125,6 +125,8 @@ struct kvm_arch {
> > #define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0
> > /* Memory Tagging Extension enabled for the guest */
> > #define KVM_ARCH_FLAG_MTE_ENABLED 1
> > + /* Gues has bought into the MMIO guard extension */
> > +#define KVM_ARCH_FLAG_MMIO_GUARD 2
> > unsigned long flags;
> >
> > /*
> > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> > index b52c5c4b9a3d..f6b8fc1671b3 100644
> > --- a/arch/arm64/include/asm/kvm_mmu.h
> > +++ b/arch/arm64/include/asm/kvm_mmu.h
> > @@ -170,6 +170,11 @@ phys_addr_t kvm_mmu_get_httbr(void);
> > phys_addr_t kvm_get_idmap_vector(void);
> > int kvm_mmu_init(u32 *hyp_va_bits);
> >
> > +/* MMIO guard */
> > +bool kvm_install_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
> > +bool kvm_remove_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
> > +bool kvm_check_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa);
> > +
> > static inline void *__kvm_vector_slot2addr(void *base,
> > enum arm64_hyp_spectre_vector slot)
> > {
> > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > index 3155c9e778f0..638827c8842b 100644
> > --- a/arch/arm64/kvm/mmu.c
> > +++ b/arch/arm64/kvm/mmu.c
> > @@ -1120,6 +1120,121 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
> > kvm_set_pfn_accessed(pte_pfn(pte));
> > }
> >
> > +#define MMIO_NOTE ('M' << 24 | 'M' << 16 | 'I' << 8 | '0')
>
> Although this made me smile, maybe we should carve up the bit space a bit
> more carefully ;) Also, you know somebody clever will "fix" that typo to
> 'O'!
>
> Quentin, as the other user of this stuff at the moment, how do you see the
> annotation space being allocated? Feels like we should have some 'type'
> bits which decide how to parse the rest of the entry.

Yes, that's probably worth doing. I've been thinking about using fancy
data structures with bitfields and such, but none of that really seems
to bit a simpler approach. So how about we make the annotate() function
accept two arguments instead of one: an 'enum kvm_pte_inval_type type'
and 'u64 payload', and then we provide a static inline helper in
pgtable.h to unpack an invalid PTE into type/payload members? I'd guess
7 bits for the type should be more than enough and the payload can use
the rest.

Thoughts?

Thanks,
Quentin

2021-07-30 13:07:08

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 04/16] KVM: arm64: Add MMIO checking infrastructure

On Fri, 30 Jul 2021 13:26:59 +0100,
Will Deacon <[email protected]> wrote:
>
> On Wed, Jul 28, 2021 at 10:57:30AM +0100, Marc Zyngier wrote:
> > On Tue, 27 Jul 2021 19:11:08 +0100,
> > Will Deacon <[email protected]> wrote:
> > > On Thu, Jul 15, 2021 at 05:31:47PM +0100, Marc Zyngier wrote:
> > > > +bool kvm_install_ioguard_page(struct kvm_vcpu *vcpu, gpa_t ipa)
> > > > +{
> > > > + struct kvm_mmu_memory_cache *memcache;
> > > > + struct kvm_memory_slot *memslot;
> > > > + int ret, idx;
> > > > +
> > > > + if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->kvm->arch.flags))
> > > > + return false;
> > > > +
> > > > + /* Must be page-aligned */
> > > > + if (ipa & ~PAGE_MASK)
> > > > + return false;
> > > > +
> > > > + /*
> > > > + * The page cannot be in a memslot. At some point, this will
> > > > + * have to deal with device mappings though.
> > > > + */
> > > > + idx = srcu_read_lock(&vcpu->kvm->srcu);
> > > > + memslot = gfn_to_memslot(vcpu->kvm, ipa >> PAGE_SHIFT);
> > > > + srcu_read_unlock(&vcpu->kvm->srcu, idx);
> > >
> > > What does this memslot check achieve? A new memslot could be added after
> > > you've checked, no?
> >
> > If you start allowing S2 annotations to coexist with potential memory
> > mappings, you're in for trouble. The faulting logic will happily
> > overwrite the annotation, and that's probably not what you want.
>
> I don't disagree, but the check above appears to be racy.

Yup, the srcu_read_unlock() should be moved at the end of this
function. It's rather silly as it is currently written...

>
> > As for new (or moving) memslots, I guess they should be checked
> > against existing annotations.
>
> Something like that, but the devil is in the details as it will need to
> synchronize with this check somehow.

The SRCU read lock should protect us against memslots being removed
whilst we're accessing it. In a way, this is no different from taking
a page fault.

For new memslots, it is a lot less clear. There are multiple levels of
locking, more or less documented... It feels like slots_arch_lock is
the right tool for this job, but I need to page all that stuff in...

M.

--
Without deviation from the norm, progress is not possible.