2023-11-06 07:14:40

by Zhang, Tina

[permalink] [raw]
Subject: [RFC PATCH 0/5] virtio-iommu: Add VT-d IO page table

The proposal about virtio-iommu support page tables is being discussed in
the virtio-comment mailing list[1]. This patch-set based on Jean's
virtio-iommu/pgtables branch[2] tries to follow the proposal and add the
basic VT-d IO page table support to virtio-iommu.

On Intel platform with VT-d nested translation enabled, there are two
main benefits for enabling virtual IOMMU support VT-d IO page table:
1) Allowing vSVM (aka vSVA) usage. Virtual Shared Virtual Addressing
(vSVA) allows the virtual processor and virtual device to use the
same virtual addresses.
2) Accelerating DMA buffer map operation for vIOVA usage by removing
the context switch on DMA buffer map operation.
(Note: this patch-set doesn't include the whole patch-set for enabling
vSVM on virtio-iommu, only includes the part for vIOVA case. However,
the vSVM enabling patch-set needs to base on this patch-set.)

There are three changes in this patch-set:
1) The first patch is a bug fixing patch that tries to resolve an issue
about IOTLB invalidation request with incorrect page size.
2) The next 3 patches are about adding generic IO page table support to
VT-d driver.
3) The last one introduces the VT-d page format table to virtio-iommu
driver.

The patch-set is also available at github:
https://github.com/TinaZhangZW/linux/tree/vt-d-pgtable

The QEMU part is available here:
https://github.com/TinaZhangZW/qemu/tree/virtio-iommu/vt-d-pgtable


[1]:https://lists.oasis-open.org/archives/virtio-comment/202310/msg00018.html
[2]:https://jpbrucker.net/git/linux/log/?h=virtio-iommu/pgtables

Tina Zhang (5):
iommu/virtio-iommu: Correct the values of granule and nr_pages
iommu/vt-d: Add generic IO page table support
iommu/io-pgtable: Introduce struct vtd_cfg
iommu/vt-d: Adapt alloc_pgtable interface to be used by others
iommu/virtio-iommu: Support attaching VT-d IO pgtable

drivers/iommu/intel/Kconfig | 1 +
drivers/iommu/intel/iommu.c | 157 ++++++++++++++++++++++++++++++
drivers/iommu/intel/iommu.h | 7 ++
drivers/iommu/io-pgtable.c | 3 +
drivers/iommu/virtio-iommu.c | 27 ++++-
include/linux/io-pgtable.h | 7 ++
include/uapi/linux/virtio_iommu.h | 26 +++++
7 files changed, 226 insertions(+), 2 deletions(-)

--
2.39.3


2023-11-06 07:14:55

by Zhang, Tina

[permalink] [raw]
Subject: [RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support

Add basic hook up code to implement generic IO page table framework.

Signed-off-by: Tina Zhang <[email protected]>
---
drivers/iommu/intel/Kconfig | 1 +
drivers/iommu/intel/iommu.c | 94 +++++++++++++++++++++++++++++++++++++
drivers/iommu/intel/iommu.h | 7 +++
drivers/iommu/io-pgtable.c | 3 ++
include/linux/io-pgtable.h | 2 +
5 files changed, 107 insertions(+)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 2e56bd79f589..8334e7e50e69 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -15,6 +15,7 @@ config INTEL_IOMMU
select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
+ select IOMMU_IO_PGTABLE
select NEED_DMA_MAP_STATE
select DMAR_TABLE
select SWIOTLB
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index dbcdf7b95b9f..80bd1993861c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -23,6 +23,7 @@
#include <linux/syscore_ops.h>
#include <linux/tboot.h>
#include <uapi/linux/iommufd.h>
+#include <linux/io-pgtable.h>

#include "iommu.h"
#include "../dma-iommu.h"
@@ -67,6 +68,20 @@
#define LEVEL_STRIDE (9)
#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)

+#define io_pgtable_cfg_to_dmar_pgtable(x) \
+ container_of((x), struct dmar_io_pgtable, pgtbl_cfg)
+
+#define io_pgtable_to_dmar_pgtable(x) \
+ container_of((x), struct dmar_io_pgtable, iop)
+
+#define io_pgtable_to_dmar_domain(x) \
+ container_of(io_pgtable_to_dmar_pgtable(x), \
+ struct dmar_domain, dmar_iop)
+
+#define io_pgtable_ops_to_dmar_domain(x) \
+ container_of(io_pgtable_to_dmar_pgtable(io_pgtable_ops_to_pgtable(x)), \
+ struct dmar_domain, dmar_iop)
+
static inline int agaw_to_level(int agaw)
{
return agaw + 2;
@@ -5171,3 +5186,82 @@ int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)

return ret;
}
+
+static void flush_all(void *cookie)
+{
+}
+
+static void flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+}
+
+static const struct iommu_flush_ops flush_ops = {
+ .tlb_flush_all = flush_all,
+ .tlb_flush_walk = flush_walk,
+ .tlb_add_page = add_page,
+};
+
+static void free_pgtable(struct io_pgtable *iop)
+{
+ struct dmar_domain *dmar_domain = io_pgtable_to_dmar_domain(iop);
+
+ if (dmar_domain->pgd) {
+ LIST_HEAD(freelist);
+
+ domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw), &freelist);
+ put_pages_list(&freelist);
+ }
+}
+
+static int pgtable_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+ struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+ return intel_iommu_map_pages(&dmar_domain->domain, iova, paddr, pgsize,
+ pgcount, iommu_prot, gfp, mapped);
+}
+
+static size_t pgtable_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
+{
+ struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+ return intel_iommu_unmap_pages(&dmar_domain->domain, iova, pgsize,
+ pgcount, gather);
+}
+
+static phys_addr_t pgtable_iova_to_phys(struct io_pgtable_ops *ops,
+ unsigned long iova)
+{
+ struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+ return intel_iommu_iova_to_phys(&dmar_domain->domain, iova);
+}
+
+static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+ struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg);
+
+ pgtable->iop.ops.map_pages = pgtable_map_pages;
+ pgtable->iop.ops.unmap_pages = pgtable_unmap_pages;
+ pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys;
+
+ cfg->tlb = &flush_ops;
+
+ return &pgtable->iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = {
+ .alloc = alloc_pgtable,
+ .free = free_pgtable,
+};
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 8d0aac71c135..5207fea6477a 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -18,6 +18,7 @@
#include <linux/list.h>
#include <linux/iommu.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/io-pgtable.h>
#include <linux/dmar.h>
#include <linux/bitfield.h>
#include <linux/xarray.h>
@@ -579,6 +580,11 @@ struct iommu_domain_info {
* to VT-d spec, section 9.3 */
};

+struct dmar_io_pgtable {
+ struct io_pgtable_cfg pgtbl_cfg;
+ struct io_pgtable iop;
+};
+
struct dmar_domain {
int nid; /* node id */
struct xarray iommu_array; /* Attached IOMMU array */
@@ -633,6 +639,7 @@ struct dmar_domain {

struct iommu_domain domain; /* generic domain data structure for
iommu core */
+ struct dmar_io_pgtable dmar_iop;
};

/*
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 5755dee96a68..533b27557290 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -35,6 +35,9 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
#ifdef CONFIG_IOMMU_IO_PGTABLE_VIRT
[VIRT_IO_PGTABLE] = &io_pgtable_virt_init_fns,
#endif
+#ifdef CONFIG_INTEL_IOMMU
+ [INTEL_IOMMU] = &io_pgtable_intel_iommu_init_fns,
+#endif
};

struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index bdcade2c4844..b2857c18f963 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -20,6 +20,7 @@ enum io_pgtable_fmt {
APPLE_DART,
APPLE_DART2,
VIRT_IO_PGTABLE,
+ INTEL_IOMMU,
IO_PGTABLE_NUM_FMTS,
};

@@ -281,5 +282,6 @@ extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns;
extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns;
extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns;
extern struct io_pgtable_init_fns io_pgtable_virt_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns;

#endif /* __IO_PGTABLE_H */
--
2.39.3

2023-11-06 07:14:59

by Zhang, Tina

[permalink] [raw]
Subject: [RFC PATCH 3/5] iommu/io-pgtable: Introduce struct vtd_cfg

VT-d hardware cap/ecap information is needed for driver to generate VT-d
format IO page table. Add struct vtd_cfg to keep the info.

Signed-off-by: Tina Zhang <[email protected]>
---
include/linux/io-pgtable.h | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index b2857c18f963..ae6a2e44b027 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -147,6 +147,11 @@ struct io_pgtable_cfg {
u32 n_ttbrs;
} apple_dart_cfg;

+ struct {
+ u64 cap_reg;
+ u64 ecap_reg;
+ } vtd_cfg;
+
struct {
dma_addr_t pgd;
} virt;
--
2.39.3

2023-11-06 07:15:02

by Zhang, Tina

[permalink] [raw]
Subject: [RFC PATCH 4/5] iommu/vt-d: Adapt alloc_pgtable interface to be used by others

The generic IO page table framework provides a set of interfaces for
invoking IO page table operations. Other entity (e.g., virtio-iommu
driver) can use the interface to ask VT-d driver to generate a VT-d
format IO page table. This patch adds the support.

Signed-off-by: Tina Zhang <[email protected]>
---
drivers/iommu/intel/iommu.c | 69 +++++++++++++++++++++++++++++++++++--
1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 80bd1993861c..d714e780a031 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5248,17 +5248,80 @@ static phys_addr_t pgtable_iova_to_phys(struct io_pgtable_ops *ops,
return intel_iommu_iova_to_phys(&dmar_domain->domain, iova);
}

+static void __iommu_calculate_cfg(struct io_pgtable_cfg *cfg)
+{
+ unsigned long fl_sagaw, sl_sagaw, sagaw;
+ int agaw, addr_width;
+
+ fl_sagaw = BIT(2) | (cap_fl5lp_support(cfg->vtd_cfg.cap_reg) ? BIT(3) : 0);
+ sl_sagaw = cap_sagaw(cfg->vtd_cfg.cap_reg);
+ sagaw = fl_sagaw & sl_sagaw;
+
+ for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); agaw >= 0; agaw--) {
+ if (test_bit(agaw, &sagaw))
+ break;
+ }
+
+ addr_width = agaw_to_width(agaw);
+ if (cfg->ias > addr_width)
+ cfg->ias = addr_width;
+ if (cfg->oas != addr_width)
+ cfg->oas = addr_width;
+}
+
static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
{
- struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg);
+ struct dmar_io_pgtable *pgtable;
+ struct dmar_domain *domain;
+ int adjust_width;
+
+ /* Platform must have nested translation support */
+ if (!ecap_nest(cfg->vtd_cfg.ecap_reg))
+ return NULL;
+
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return NULL;
+
+ domain->nid = NUMA_NO_NODE;
+ domain->use_first_level = true;
+ domain->has_iotlb_device = false;
+ INIT_LIST_HEAD(&domain->devices);
+ spin_lock_init(&domain->lock);
+ xa_init(&domain->iommu_array);
+
+ /* calculate AGAW */
+ __iommu_calculate_cfg(cfg);
+ domain->gaw = cfg->ias;
+ adjust_width = guestwidth_to_adjustwidth(domain->gaw);
+ domain->agaw = width_to_agaw(adjust_width);
+
+ domain->iommu_coherency = ecap_smpwc(cfg->vtd_cfg.ecap_reg);
+ domain->force_snooping = true;
+ domain->iommu_superpage = cap_fl1gp_support(cfg->vtd_cfg.ecap_reg) ? 2 : 1;
+ domain->max_addr = 0;
+
+ cfg->coherent_walk = domain->iommu_coherency;
+
+ pgtable = &domain->dmar_iop;

+ /* always allocate the top pgd */
+ domain->pgd = alloc_pgtable_page(domain->nid, GFP_KERNEL);
+ if (!domain->pgd)
+ goto out_free_domain;
+ domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
+
+ cfg->virt.pgd = virt_to_phys(domain->pgd);
+ cfg->tlb = &flush_ops;
pgtable->iop.ops.map_pages = pgtable_map_pages;
pgtable->iop.ops.unmap_pages = pgtable_unmap_pages;
pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys;

- cfg->tlb = &flush_ops;
-
return &pgtable->iop;
+
+out_free_domain:
+ kfree(domain);
+ return NULL;
}

struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = {
--
2.39.3

2023-11-06 07:16:15

by Zhang, Tina

[permalink] [raw]
Subject: [RFC PATCH 5/5] iommu/virtio-iommu: Support attaching VT-d IO pgtable

Add VT-d IO page table support to ATTACH_TABLE request.

Signed-off-by: Tina Zhang <[email protected]>
---
drivers/iommu/virtio-iommu.c | 23 +++++++++++++++++++++++
include/uapi/linux/virtio_iommu.h | 26 ++++++++++++++++++++++++++
2 files changed, 49 insertions(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index b1ceaac974e2..b02eeb1d27a4 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -991,12 +991,25 @@ static int viommu_attach_pgtable(struct viommu_domain *vdomain,
};

/* TODO: bypass flag? */
+ if (vdomain->bypass == true)
+ return 0;

switch (fmt) {
case VIRT_IO_PGTABLE:
req.format = cpu_to_le16(VIRTIO_IOMMU_FORMAT_PGTF_VIRT);
req.pgd = cpu_to_le64((u64)cfg->virt.pgd);
break;
+ case INTEL_IOMMU: {
+ struct virtio_iommu_req_attach_pgt_vtd *vtd_req =
+ (struct virtio_iommu_req_attach_pgt_vtd *)&req;
+
+ vtd_req->format = cpu_to_le16(VIRTIO_IOMMU_FORMAT_PGTF_VTD);
+ vtd_req->pgd = cpu_to_le64((u64)cfg->virt.pgd);
+ vtd_req->addr_width = cpu_to_le32(cfg->oas);
+ vtd_req->pasid = IOMMU_NO_PASID;
+ break;
+ }
+
default:
return -EINVAL;
};
@@ -1034,6 +1047,16 @@ static int viommu_setup_pgtable(struct viommu_domain *vdomain,
case VIRTIO_IOMMU_FORMAT_PGTF_VIRT:
fmt = VIRT_IO_PGTABLE;
break;
+ case VIRTIO_IOMMU_FORMAT_PGTF_VTD:
+ {
+ struct virtio_iommu_probe_pgt_vtd *vtd_desc =
+ (struct virtio_iommu_probe_pgt_vtd *)desc;
+
+ cfg.vtd_cfg.cap_reg = le64_to_cpu(vtd_desc->cap_reg);
+ cfg.vtd_cfg.ecap_reg = le64_to_cpu(vtd_desc->ecap_reg);
+ fmt = INTEL_IOMMU;
+ break;
+ }
default:
dev_warn(vdev->dev, "unsupported page table format 0x%x\n",
le16_to_cpu(desc->format));
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
index 656be1f3d926..17e0d5fcdd54 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -139,6 +139,22 @@ struct virtio_iommu_req_attach_pgt_virt {
struct virtio_iommu_req_tail tail;
};

+/* Vt-d I/O Page Table Descriptor */
+struct virtio_iommu_req_attach_pgt_vtd {
+ struct virtio_iommu_req_head head;
+ __le32 domain;
+ __le32 endpoint;
+ __le32 flags;
+ __le16 format;
+ __u8 reserved[2];
+ __le32 pasid;
+ __le64 pgd;
+ __le64 fl_flags;
+ __le32 addr_width;
+ __u8 reserved2[36];
+ struct virtio_iommu_req_tail tail;
+};
+
#define VIRTIO_IOMMU_MAP_F_READ (1 << 0)
#define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1)
#define VIRTIO_IOMMU_MAP_F_MMIO (1 << 2)
@@ -224,6 +240,8 @@ struct virtio_iommu_probe_pasid_size {
#define VIRTIO_IOMMU_FORMAT_PSTF_ARM_SMMU_V3 2
/* Virt I/O page table format */
#define VIRTIO_IOMMU_FORMAT_PGTF_VIRT 3
+/* VT-d I/O page table format */
+#define VIRTIO_IOMMU_FORMAT_PGTF_VTD 4

struct virtio_iommu_probe_table_format {
struct virtio_iommu_probe_property head;
@@ -231,6 +249,14 @@ struct virtio_iommu_probe_table_format {
__u8 reserved[2];
};

+struct virtio_iommu_probe_pgt_vtd {
+ struct virtio_iommu_probe_property head;
+ __le16 format;
+ __u8 reserved[2];
+ __le64 cap_reg;
+ __le64 ecap_reg;
+};
+
struct virtio_iommu_req_probe {
struct virtio_iommu_req_head head;
__le32 endpoint;
--
2.39.3

2023-11-06 19:32:53

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support

On Mon, Nov 06, 2023 at 02:12:23AM -0500, Tina Zhang wrote:
> Add basic hook up code to implement generic IO page table framework.
>
> Signed-off-by: Tina Zhang <[email protected]>
> ---
> drivers/iommu/intel/Kconfig | 1 +
> drivers/iommu/intel/iommu.c | 94 +++++++++++++++++++++++++++++++++++++
> drivers/iommu/intel/iommu.h | 7 +++
> drivers/iommu/io-pgtable.c | 3 ++
> include/linux/io-pgtable.h | 2 +
> 5 files changed, 107 insertions(+)

If this is going to happen can we also convert vt-d to actually use
the io page table stuff directly and shuffle the code around so it is
structured like the rest of the io page table implementations?

Jason