Allow device drivers and VFIO to get notifications on IOMMU translation
fault, and to handle recoverable faults (PCI PRI). These four patches
are relatively mature since they are required by three different series,
and have been under discussion for a while:
* Nested translation support for SMMUv3 [1].
* vSVA for VT-d [2].
* My generic host SVA implementation.
I reworked patch 4 according to previous discussions, and moved the page
response structure to UAPI. For the other patches I only fixed comments
and whitespaces. Please have a look and see if it works for you.
[1] [PATCH v7 00/23] SMMUv3 Nested Stage Setup
https://lore.kernel.org/lkml/[email protected]/
[2] [PATCH v3 00/16] Shared virtual address IOMMU and VT-d support
https://lore.kernel.org/lkml/[email protected]/
Jacob Pan (3):
driver core: Add per device iommu param
iommu: Introduce device fault data
iommu: Introduce device fault report API
Jean-Philippe Brucker (1):
iommu: Add recoverable fault reporting
drivers/iommu/iommu.c | 218 +++++++++++++++++++++++++++++++++++++
include/linux/device.h | 3 +
include/linux/iommu.h | 91 ++++++++++++++++
include/uapi/linux/iommu.h | 152 ++++++++++++++++++++++++++
4 files changed, 464 insertions(+)
create mode 100644 include/uapi/linux/iommu.h
--
2.21.0
From: Jacob Pan <[email protected]>
Device faults detected by IOMMU can be reported outside the IOMMU
subsystem for further processing. This patch introduces
a generic device fault data structure.
The fault can be either an unrecoverable fault or a page request,
also referred to as a recoverable fault.
We only care about non internal faults that are likely to be reported
to an external subsystem.
Signed-off-by: Jacob Pan <[email protected]>
Signed-off-by: Jean-Philippe Brucker <[email protected]>
Signed-off-by: Liu, Yi L <[email protected]>
Signed-off-by: Ashok Raj <[email protected]>
Signed-off-by: Eric Auger <[email protected]>
---
include/linux/iommu.h | 43 ++++++++++++++
include/uapi/linux/iommu.h | 118 +++++++++++++++++++++++++++++++++++++
2 files changed, 161 insertions(+)
create mode 100644 include/uapi/linux/iommu.h
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a815cf6f6f47..d442f5f3fa93 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -25,6 +25,7 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/of.h>
+#include <uapi/linux/iommu.h>
#define IOMMU_READ (1 << 0)
#define IOMMU_WRITE (1 << 1)
@@ -49,6 +50,7 @@ struct device;
struct iommu_domain;
struct notifier_block;
struct iommu_sva;
+struct iommu_fault_event;
/* iommu fault flags */
#define IOMMU_FAULT_READ 0x0
@@ -58,6 +60,7 @@ typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
struct device *, unsigned long, int, void *);
typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *,
void *);
+typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *, void *);
struct iommu_domain_geometry {
dma_addr_t aperture_start; /* First address that can be mapped */
@@ -301,6 +304,45 @@ struct iommu_device {
struct device *dev;
};
+/**
+ * struct iommu_fault_event - Generic fault event
+ *
+ * Can represent recoverable faults such as a page requests or
+ * unrecoverable faults such as DMA or IRQ remapping faults.
+ *
+ * @fault: fault descriptor
+ * @iommu_private: used by the IOMMU driver for storing fault-specific
+ * data. Users should not modify this field before
+ * sending the fault response.
+ */
+struct iommu_fault_event {
+ struct iommu_fault fault;
+ u64 iommu_private;
+};
+
+/**
+ * struct iommu_fault_param - per-device IOMMU fault data
+ * @handler: Callback function to handle IOMMU faults at device level
+ * @data: handler private data
+ */
+struct iommu_fault_param {
+ iommu_dev_fault_handler_t handler;
+ void *data;
+};
+
+/**
+ * struct iommu_param - collection of per-device IOMMU data
+ *
+ * @fault_param: IOMMU detected device fault reporting data
+ *
+ * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
+ * struct iommu_group *iommu_group;
+ * struct iommu_fwspec *iommu_fwspec;
+ */
+struct iommu_param {
+ struct iommu_fault_param *fault_param;
+};
+
int iommu_device_register(struct iommu_device *iommu);
void iommu_device_unregister(struct iommu_device *iommu);
int iommu_device_sysfs_add(struct iommu_device *iommu,
@@ -504,6 +546,7 @@ struct iommu_ops {};
struct iommu_group {};
struct iommu_fwspec {};
struct iommu_device {};
+struct iommu_fault_param {};
static inline bool iommu_present(struct bus_type *bus)
{
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
new file mode 100644
index 000000000000..796402174d6c
--- /dev/null
+++ b/include/uapi/linux/iommu.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * IOMMU user API definitions
+ */
+
+#ifndef _UAPI_IOMMU_H
+#define _UAPI_IOMMU_H
+
+#include <linux/types.h>
+
+#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
+#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
+#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
+#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
+
+/* Generic fault types, can be expanded IRQ remapping fault */
+enum iommu_fault_type {
+ IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */
+ IOMMU_FAULT_PAGE_REQ, /* page request fault */
+};
+
+enum iommu_fault_reason {
+ IOMMU_FAULT_REASON_UNKNOWN = 0,
+
+ /* Could not access the PASID table (fetch caused external abort) */
+ IOMMU_FAULT_REASON_PASID_FETCH,
+
+ /* PASID entry is invalid or has configuration errors */
+ IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
+
+ /*
+ * PASID is out of range (e.g. exceeds the maximum PASID
+ * supported by the IOMMU) or disabled.
+ */
+ IOMMU_FAULT_REASON_PASID_INVALID,
+
+ /*
+ * An external abort occurred fetching (or updating) a translation
+ * table descriptor
+ */
+ IOMMU_FAULT_REASON_WALK_EABT,
+
+ /*
+ * Could not access the page table entry (Bad address),
+ * actual translation fault
+ */
+ IOMMU_FAULT_REASON_PTE_FETCH,
+
+ /* Protection flag check failed */
+ IOMMU_FAULT_REASON_PERMISSION,
+
+ /* access flag check failed */
+ IOMMU_FAULT_REASON_ACCESS,
+
+ /* Output address of a translation stage caused Address Size fault */
+ IOMMU_FAULT_REASON_OOR_ADDRESS,
+};
+
+/**
+ * struct iommu_fault_unrecoverable - Unrecoverable fault data
+ * @reason: reason of the fault, from &enum iommu_fault_reason
+ * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
+ * @pasid: Process Address Space ID
+ * @perm: requested permission access using by the incoming transaction
+ * (IOMMU_FAULT_PERM_* values)
+ * @addr: offending page address
+ * @fetch_addr: address that caused a fetch abort, if any
+ */
+struct iommu_fault_unrecoverable {
+ __u32 reason;
+#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0)
+#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1)
+#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2)
+ __u32 flags;
+ __u32 pasid;
+ __u32 perm;
+ __u64 addr;
+ __u64 fetch_addr;
+};
+
+/**
+ * struct iommu_fault_page_request - Page Request data
+ * @flags: encodes whether the corresponding fields are valid and whether this
+ * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values)
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
+ * @addr: page address
+ * @private_data: device-specific private information
+ */
+struct iommu_fault_page_request {
+#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
+#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
+#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
+ __u32 flags;
+ __u32 pasid;
+ __u32 grpid;
+ __u32 perm;
+ __u64 addr;
+ __u64 private_data[2];
+};
+
+/**
+ * struct iommu_fault - Generic fault data
+ * @type: fault type from &enum iommu_fault_type
+ * @padding: reserved for future use (should be zero)
+ * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
+ * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
+ */
+struct iommu_fault {
+ __u32 type;
+ __u32 padding;
+ union {
+ struct iommu_fault_unrecoverable event;
+ struct iommu_fault_page_request prm;
+ };
+};
+#endif /* _UAPI_IOMMU_H */
--
2.21.0
On 23/05/2019 19:06, Jean-Philippe Brucker wrote:
> From: Jacob Pan <[email protected]>
>
> Device faults detected by IOMMU can be reported outside the IOMMU
> subsystem for further processing. This patch introduces
> a generic device fault data structure.
>
> The fault can be either an unrecoverable fault or a page request,
> also referred to as a recoverable fault.
>
> We only care about non internal faults that are likely to be reported
> to an external subsystem.
>
> Signed-off-by: Jacob Pan <[email protected]>
> Signed-off-by: Jean-Philippe Brucker <[email protected]>
> Signed-off-by: Liu, Yi L <[email protected]>
> Signed-off-by: Ashok Raj <[email protected]>
> Signed-off-by: Eric Auger <[email protected]>
> ---
> include/linux/iommu.h | 43 ++++++++++++++
> include/uapi/linux/iommu.h | 118 +++++++++++++++++++++++++++++++++++++
> 2 files changed, 161 insertions(+)
> create mode 100644 include/uapi/linux/iommu.h
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index a815cf6f6f47..d442f5f3fa93 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
> #include <linux/errno.h>
> #include <linux/err.h>
> #include <linux/of.h>
> +#include <uapi/linux/iommu.h>
>
> #define IOMMU_READ (1 << 0)
> #define IOMMU_WRITE (1 << 1)
> @@ -49,6 +50,7 @@ struct device;
> struct iommu_domain;
> struct notifier_block;
> struct iommu_sva;
> +struct iommu_fault_event;
>
> /* iommu fault flags */
> #define IOMMU_FAULT_READ 0x0
> @@ -58,6 +60,7 @@ typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
> struct device *, unsigned long, int, void *);
> typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *,
> void *);
> +typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *, void *);
>
> struct iommu_domain_geometry {
> dma_addr_t aperture_start; /* First address that can be mapped */
> @@ -301,6 +304,45 @@ struct iommu_device {
> struct device *dev;
> };
>
> +/**
> + * struct iommu_fault_event - Generic fault event
> + *
> + * Can represent recoverable faults such as a page requests or
> + * unrecoverable faults such as DMA or IRQ remapping faults.
> + *
> + * @fault: fault descriptor
> + * @iommu_private: used by the IOMMU driver for storing fault-specific
> + * data. Users should not modify this field before
> + * sending the fault response.
Sorry if I'm a bit late to the party, but given that description, if
users aren't allowed to touch this then why expose it to them at all?
I.e. why not have iommu_report_device_fault() pass just the fault itself
to the fault handler:
ret = fparam->handler(&evt->fault, fparam->data);
and let the IOMMU core/drivers decapsulate it again later if need be.
AFAICS drivers could also just embed the entire generic event in their
own private structure anyway, just as we do for domains.
Robin.
> + */
> +struct iommu_fault_event {
> + struct iommu_fault fault;
> + u64 iommu_private;
> +};
> +
> +/**
> + * struct iommu_fault_param - per-device IOMMU fault data
> + * @handler: Callback function to handle IOMMU faults at device level
> + * @data: handler private data
> + */
> +struct iommu_fault_param {
> + iommu_dev_fault_handler_t handler;
> + void *data;
> +};
> +
> +/**
> + * struct iommu_param - collection of per-device IOMMU data
> + *
> + * @fault_param: IOMMU detected device fault reporting data
> + *
> + * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
> + * struct iommu_group *iommu_group;
> + * struct iommu_fwspec *iommu_fwspec;
> + */
> +struct iommu_param {
> + struct iommu_fault_param *fault_param;
> +};
> +
> int iommu_device_register(struct iommu_device *iommu);
> void iommu_device_unregister(struct iommu_device *iommu);
> int iommu_device_sysfs_add(struct iommu_device *iommu,
> @@ -504,6 +546,7 @@ struct iommu_ops {};
> struct iommu_group {};
> struct iommu_fwspec {};
> struct iommu_device {};
> +struct iommu_fault_param {};
>
> static inline bool iommu_present(struct bus_type *bus)
> {
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> new file mode 100644
> index 000000000000..796402174d6c
> --- /dev/null
> +++ b/include/uapi/linux/iommu.h
> @@ -0,0 +1,118 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * IOMMU user API definitions
> + */
> +
> +#ifndef _UAPI_IOMMU_H
> +#define _UAPI_IOMMU_H
> +
> +#include <linux/types.h>
> +
> +#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
> +#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
> +#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
> +#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
> +
> +/* Generic fault types, can be expanded IRQ remapping fault */
> +enum iommu_fault_type {
> + IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */
> + IOMMU_FAULT_PAGE_REQ, /* page request fault */
> +};
> +
> +enum iommu_fault_reason {
> + IOMMU_FAULT_REASON_UNKNOWN = 0,
> +
> + /* Could not access the PASID table (fetch caused external abort) */
> + IOMMU_FAULT_REASON_PASID_FETCH,
> +
> + /* PASID entry is invalid or has configuration errors */
> + IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
> +
> + /*
> + * PASID is out of range (e.g. exceeds the maximum PASID
> + * supported by the IOMMU) or disabled.
> + */
> + IOMMU_FAULT_REASON_PASID_INVALID,
> +
> + /*
> + * An external abort occurred fetching (or updating) a translation
> + * table descriptor
> + */
> + IOMMU_FAULT_REASON_WALK_EABT,
> +
> + /*
> + * Could not access the page table entry (Bad address),
> + * actual translation fault
> + */
> + IOMMU_FAULT_REASON_PTE_FETCH,
> +
> + /* Protection flag check failed */
> + IOMMU_FAULT_REASON_PERMISSION,
> +
> + /* access flag check failed */
> + IOMMU_FAULT_REASON_ACCESS,
> +
> + /* Output address of a translation stage caused Address Size fault */
> + IOMMU_FAULT_REASON_OOR_ADDRESS,
> +};
> +
> +/**
> + * struct iommu_fault_unrecoverable - Unrecoverable fault data
> + * @reason: reason of the fault, from &enum iommu_fault_reason
> + * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
> + * @pasid: Process Address Space ID
> + * @perm: requested permission access using by the incoming transaction
> + * (IOMMU_FAULT_PERM_* values)
> + * @addr: offending page address
> + * @fetch_addr: address that caused a fetch abort, if any
> + */
> +struct iommu_fault_unrecoverable {
> + __u32 reason;
> +#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0)
> +#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1)
> +#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2)
> + __u32 flags;
> + __u32 pasid;
> + __u32 perm;
> + __u64 addr;
> + __u64 fetch_addr;
> +};
> +
> +/**
> + * struct iommu_fault_page_request - Page Request data
> + * @flags: encodes whether the corresponding fields are valid and whether this
> + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values)
> + * @pasid: Process Address Space ID
> + * @grpid: Page Request Group Index
> + * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
> + * @addr: page address
> + * @private_data: device-specific private information
> + */
> +struct iommu_fault_page_request {
> +#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
> +#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
> +#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
> + __u32 flags;
> + __u32 pasid;
> + __u32 grpid;
> + __u32 perm;
> + __u64 addr;
> + __u64 private_data[2];
> +};
> +
> +/**
> + * struct iommu_fault - Generic fault data
> + * @type: fault type from &enum iommu_fault_type
> + * @padding: reserved for future use (should be zero)
> + * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
> + * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
> + */
> +struct iommu_fault {
> + __u32 type;
> + __u32 padding;
> + union {
> + struct iommu_fault_unrecoverable event;
> + struct iommu_fault_page_request prm;
> + };
> +};
> +#endif /* _UAPI_IOMMU_H */
>
On Thu, 23 May 2019 19:43:46 +0100
Robin Murphy <[email protected]> wrote:
> On 23/05/2019 19:06, Jean-Philippe Brucker wrote:
> > From: Jacob Pan <[email protected]>
> >
> > Device faults detected by IOMMU can be reported outside the IOMMU
> > subsystem for further processing. This patch introduces
> > a generic device fault data structure.
> >
> > The fault can be either an unrecoverable fault or a page request,
> > also referred to as a recoverable fault.
> >
> > We only care about non internal faults that are likely to be
> > reported to an external subsystem.
> >
> > Signed-off-by: Jacob Pan <[email protected]>
> > Signed-off-by: Jean-Philippe Brucker <[email protected]>
> > Signed-off-by: Liu, Yi L <[email protected]>
> > Signed-off-by: Ashok Raj <[email protected]>
> > Signed-off-by: Eric Auger <[email protected]>
> > ---
> > include/linux/iommu.h | 43 ++++++++++++++
> > include/uapi/linux/iommu.h | 118
> > +++++++++++++++++++++++++++++++++++++ 2 files changed, 161
> > insertions(+) create mode 100644 include/uapi/linux/iommu.h
> >
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index a815cf6f6f47..d442f5f3fa93 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -25,6 +25,7 @@
> > #include <linux/errno.h>
> > #include <linux/err.h>
> > #include <linux/of.h>
> > +#include <uapi/linux/iommu.h>
> >
> > #define IOMMU_READ (1 << 0)
> > #define IOMMU_WRITE (1 << 1)
> > @@ -49,6 +50,7 @@ struct device;
> > struct iommu_domain;
> > struct notifier_block;
> > struct iommu_sva;
> > +struct iommu_fault_event;
> >
> > /* iommu fault flags */
> > #define IOMMU_FAULT_READ 0x0
> > @@ -58,6 +60,7 @@ typedef int (*iommu_fault_handler_t)(struct
> > iommu_domain *, struct device *, unsigned long, int, void *);
> > typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct
> > iommu_sva *, void *);
> > +typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event
> > *, void *);
> > struct iommu_domain_geometry {
> > dma_addr_t aperture_start; /* First address that can be
> > mapped */ @@ -301,6 +304,45 @@ struct iommu_device {
> > struct device *dev;
> > };
> >
> > +/**
> > + * struct iommu_fault_event - Generic fault event
> > + *
> > + * Can represent recoverable faults such as a page requests or
> > + * unrecoverable faults such as DMA or IRQ remapping faults.
> > + *
> > + * @fault: fault descriptor
> > + * @iommu_private: used by the IOMMU driver for storing
> > fault-specific
> > + * data. Users should not modify this field before
> > + * sending the fault response.
>
> Sorry if I'm a bit late to the party, but given that description, if
> users aren't allowed to touch this then why expose it to them at all?
> I.e. why not have iommu_report_device_fault() pass just the fault
> itself to the fault handler:
>
> ret = fparam->handler(&evt->fault, fparam->data);
>
> and let the IOMMU core/drivers decapsulate it again later if need be.
> AFAICS drivers could also just embed the entire generic event in
> their own private structure anyway, just as we do for domains.
>
I can't remember all the discussion history but I think iommu_private
is used similarly to the page request private data (device private). We
need to inject the data to the guest and the guest will send the
unmodified data back along with response. The private data can be used
to tag internal device/iommu context.
I think we can do the way you said by keeping them within iommu core
and recover it based on the response but that would require tracking
each fault report, right?
If we pass on the private data, we only need to check if the response
belong to the device but not exact match of a specific fault since the
damage is contained in the assigned device. In case of injection
fault into the guest, the response will come asynchronously after the
handler completes.
> Robin.
>
> > + */
> > +struct iommu_fault_event {
> > + struct iommu_fault fault;
> > + u64 iommu_private;
> > +};
> > +
> > +/**
> > + * struct iommu_fault_param - per-device IOMMU fault data
> > + * @handler: Callback function to handle IOMMU faults at device
> > level
> > + * @data: handler private data
> > + */
> > +struct iommu_fault_param {
> > + iommu_dev_fault_handler_t handler;
> > + void *data;
> > +};
> > +
> > +/**
> > + * struct iommu_param - collection of per-device IOMMU data
> > + *
> > + * @fault_param: IOMMU detected device fault reporting data
> > + *
> > + * TODO: migrate other per device data pointers under
> > iommu_dev_data, e.g.
> > + * struct iommu_group *iommu_group;
> > + * struct iommu_fwspec *iommu_fwspec;
> > + */
> > +struct iommu_param {
> > + struct iommu_fault_param *fault_param;
> > +};
> > +
> > int iommu_device_register(struct iommu_device *iommu);
> > void iommu_device_unregister(struct iommu_device *iommu);
> > int iommu_device_sysfs_add(struct iommu_device *iommu,
> > @@ -504,6 +546,7 @@ struct iommu_ops {};
> > struct iommu_group {};
> > struct iommu_fwspec {};
> > struct iommu_device {};
> > +struct iommu_fault_param {};
> >
> > static inline bool iommu_present(struct bus_type *bus)
> > {
> > diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> > new file mode 100644
> > index 000000000000..796402174d6c
> > --- /dev/null
> > +++ b/include/uapi/linux/iommu.h
> > @@ -0,0 +1,118 @@
> > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > +/*
> > + * IOMMU user API definitions
> > + */
> > +
> > +#ifndef _UAPI_IOMMU_H
> > +#define _UAPI_IOMMU_H
> > +
> > +#include <linux/types.h>
> > +
> > +#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
> > +#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
> > +#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
> > +#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
> > +
> > +/* Generic fault types, can be expanded IRQ remapping fault */
> > +enum iommu_fault_type {
> > + IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault
> > */
> > + IOMMU_FAULT_PAGE_REQ, /* page request fault
> > */ +};
> > +
> > +enum iommu_fault_reason {
> > + IOMMU_FAULT_REASON_UNKNOWN = 0,
> > +
> > + /* Could not access the PASID table (fetch caused external
> > abort) */
> > + IOMMU_FAULT_REASON_PASID_FETCH,
> > +
> > + /* PASID entry is invalid or has configuration errors */
> > + IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
> > +
> > + /*
> > + * PASID is out of range (e.g. exceeds the maximum PASID
> > + * supported by the IOMMU) or disabled.
> > + */
> > + IOMMU_FAULT_REASON_PASID_INVALID,
> > +
> > + /*
> > + * An external abort occurred fetching (or updating) a
> > translation
> > + * table descriptor
> > + */
> > + IOMMU_FAULT_REASON_WALK_EABT,
> > +
> > + /*
> > + * Could not access the page table entry (Bad address),
> > + * actual translation fault
> > + */
> > + IOMMU_FAULT_REASON_PTE_FETCH,
> > +
> > + /* Protection flag check failed */
> > + IOMMU_FAULT_REASON_PERMISSION,
> > +
> > + /* access flag check failed */
> > + IOMMU_FAULT_REASON_ACCESS,
> > +
> > + /* Output address of a translation stage caused Address
> > Size fault */
> > + IOMMU_FAULT_REASON_OOR_ADDRESS,
> > +};
> > +
> > +/**
> > + * struct iommu_fault_unrecoverable - Unrecoverable fault data
> > + * @reason: reason of the fault, from &enum iommu_fault_reason
> > + * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
> > + * @pasid: Process Address Space ID
> > + * @perm: requested permission access using by the incoming
> > transaction
> > + * (IOMMU_FAULT_PERM_* values)
> > + * @addr: offending page address
> > + * @fetch_addr: address that caused a fetch abort, if any
> > + */
> > +struct iommu_fault_unrecoverable {
> > + __u32 reason;
> > +#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0)
> > +#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1)
> > +#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2)
> > + __u32 flags;
> > + __u32 pasid;
> > + __u32 perm;
> > + __u64 addr;
> > + __u64 fetch_addr;
> > +};
> > +
> > +/**
> > + * struct iommu_fault_page_request - Page Request data
> > + * @flags: encodes whether the corresponding fields are valid and
> > whether this
> > + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_*
> > values)
> > + * @pasid: Process Address Space ID
> > + * @grpid: Page Request Group Index
> > + * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
> > + * @addr: page address
> > + * @private_data: device-specific private information
> > + */
> > +struct iommu_fault_page_request {
> > +#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
> > +#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
> > +#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
> > + __u32 flags;
> > + __u32 pasid;
> > + __u32 grpid;
> > + __u32 perm;
> > + __u64 addr;
> > + __u64 private_data[2];
> > +};
> > +
> > +/**
> > + * struct iommu_fault - Generic fault data
> > + * @type: fault type from &enum iommu_fault_type
> > + * @padding: reserved for future use (should be zero)
> > + * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
> > + * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
> > + */
> > +struct iommu_fault {
> > + __u32 type;
> > + __u32 padding;
> > + union {
> > + struct iommu_fault_unrecoverable event;
> > + struct iommu_fault_page_request prm;
> > + };
> > +};
> > +#endif /* _UAPI_IOMMU_H */
> >
> _______________________________________________
> iommu mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
[Jacob Pan]
On 24/05/2019 14:49, Jacob Pan wrote:
> On Thu, 23 May 2019 19:43:46 +0100
> Robin Murphy <[email protected]> wrote:
>>> +/**
>>> + * struct iommu_fault_event - Generic fault event
>>> + *
>>> + * Can represent recoverable faults such as a page requests or
>>> + * unrecoverable faults such as DMA or IRQ remapping faults.
>>> + *
>>> + * @fault: fault descriptor
>>> + * @iommu_private: used by the IOMMU driver for storing
>>> fault-specific
>>> + * data. Users should not modify this field before
>>> + * sending the fault response.
>>
>> Sorry if I'm a bit late to the party, but given that description, if
>> users aren't allowed to touch this then why expose it to them at all?
>> I.e. why not have iommu_report_device_fault() pass just the fault
>> itself to the fault handler:
>>
>> ret = fparam->handler(&evt->fault, fparam->data);
>>
>> and let the IOMMU core/drivers decapsulate it again later if need be.
>> AFAICS drivers could also just embed the entire generic event in
>> their own private structure anyway, just as we do for domains.
>>
> I can't remember all the discussion history but I think iommu_private
> is used similarly to the page request private data (device private).
Hm yes, we already have iommu_fault_page_request::private_data for that.
I think I used to stash flags in iommu_private (is_stall and
needs_pasid), so that the SMMUv3 driver doesn't need to go fetch them
from the device structure, but I removed them. If VT-d doesn't need
iommu_private either, maybe we can remove it entirely?
In any case I agree that device drivers should only need to know about
evt->fault.
> We
> need to inject the data to the guest and the guest will send the
> unmodified data back along with response.
By the way, does private_data need to go back through the
iommu_page_response() path? The current series doesn't do that.
> The private data can be used
> to tag internal device/iommu context.
> I think we can do the way you said by keeping them within iommu core
> and recover it based on the response but that would require tracking
> each fault report, right?
That's already the case: we decided in thread [1] to track recoverable
faults in the IOMMU core, in order to check that the response is sane
and to set a quota and/or timeout. (I didn't include your timeout
patches here because I think they need a little more work. They are on
my sva/api branch.)
I already dropped iommu_private from the iommu_page_response structure.
In patch 4 iommu_page_response() retrieves the fault event and pass the
corresponding iommu_private back to the IOMMU driver.
[1] https://lore.kernel.org/lkml/20171206112521.1edf8e9b@jacob-builder/
Thanks,
Jean
>
> If we pass on the private data, we only need to check if the response
> belong to the device but not exact match of a specific fault since the
> damage is contained in the assigned device. In case of injection
> fault into the guest, the response will come asynchronously after the
> handler completes.
On Fri, 24 May 2019 17:14:30 +0100
Jean-Philippe Brucker <[email protected]> wrote:
> On 24/05/2019 14:49, Jacob Pan wrote:
> > On Thu, 23 May 2019 19:43:46 +0100
> > Robin Murphy <[email protected]> wrote:
> >>> +/**
> >>> + * struct iommu_fault_event - Generic fault event
> >>> + *
> >>> + * Can represent recoverable faults such as a page requests or
> >>> + * unrecoverable faults such as DMA or IRQ remapping faults.
> >>> + *
> >>> + * @fault: fault descriptor
> >>> + * @iommu_private: used by the IOMMU driver for storing
> >>> fault-specific
> >>> + * data. Users should not modify this field
> >>> before
> >>> + * sending the fault response.
> >>
> >> Sorry if I'm a bit late to the party, but given that description,
> >> if users aren't allowed to touch this then why expose it to them
> >> at all? I.e. why not have iommu_report_device_fault() pass just
> >> the fault itself to the fault handler:
> >>
> >> ret = fparam->handler(&evt->fault, fparam->data);
> >>
> >> and let the IOMMU core/drivers decapsulate it again later if need
> >> be. AFAICS drivers could also just embed the entire generic event
> >> in their own private structure anyway, just as we do for domains.
> >>
> > I can't remember all the discussion history but I think
> > iommu_private is used similarly to the page request private data
> > (device private).
>
> Hm yes, we already have iommu_fault_page_request::private_data for
> that. I think I used to stash flags in iommu_private (is_stall and
> needs_pasid), so that the SMMUv3 driver doesn't need to go fetch them
> from the device structure, but I removed them. If VT-d doesn't need
> iommu_private either, maybe we can remove it entirely?
>
yes, vt-d does not use or plan to use it.
> In any case I agree that device drivers should only need to know about
> evt->fault.
>
> > We
> > need to inject the data to the guest and the guest will send the
> > unmodified data back along with response.
>
> By the way, does private_data need to go back through the
> iommu_page_response() path? The current series doesn't do that.
>
yes, private needs to go back in the page_response path. perhaps just
send the response with the match prm?
-ret = domain->ops->page_response(dev, msg, evt->iommu_private);
+ret = domain->ops->page_response(dev, msg, prm);
> > The private data can be used
> > to tag internal device/iommu context.
>
> > I think we can do the way you said by keeping them within iommu core
> > and recover it based on the response but that would require tracking
> > each fault report, right?
>
> That's already the case: we decided in thread [1] to track recoverable
> faults in the IOMMU core, in order to check that the response is sane
> and to set a quota and/or timeout. (I didn't include your timeout
> patches here because I think they need a little more work. They are on
> my sva/api branch.)
>
> I already dropped iommu_private from the iommu_page_response
> structure. In patch 4 iommu_page_response() retrieves the fault event
> and pass the corresponding iommu_private back to the IOMMU driver.
>
> [1]
> https://lore.kernel.org/lkml/20171206112521.1edf8e9b@jacob-builder/
>
great, as planned :) I lost track where the discussion ended and
haven't read the latest code. Thanks
> Thanks,
> Jean
>
> >
> > If we pass on the private data, we only need to check if the
> > response belong to the device but not exact match of a specific
> > fault since the damage is contained in the assigned device. In case
> > of injection fault into the guest, the response will come
> > asynchronously after the handler completes.
[Jacob Pan]