From: Jacob Pan <[email protected]>
When an IO page request is processed outside IOMMU subsystem, response
can be delayed or lost. Add a tunable setup parameter such that user can
choose the timeout for IOMMU to track pending page requests.
This timeout mechanism is a basic safety net which can be implemented in
conjunction with credit based or device level page response exception
handling.
Signed-off-by: Jacob Pan <[email protected]>
---
.../admin-guide/kernel-parameters.txt | 8 +++++
drivers/iommu/iommu.c | 29 +++++++++++++++++++
2 files changed, 37 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..b43f0893d252 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1813,6 +1813,14 @@
1 - Bypass the IOMMU for DMA.
unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
+ iommu.prq_timeout=
+ Timeout in seconds to wait for page response
+ of a pending page request.
+ Format: <integer>
+ Default: 10
+ 0 - no timeout tracking
+ 1 to 100 - allowed range
+
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 13b301cfb10f..64e87d56f471 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -45,6 +45,19 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
#endif
static bool iommu_dma_strict __read_mostly = true;
+/*
+ * Timeout to wait for page response of a pending page request. This is
+ * intended as a basic safty net in case a pending page request is not
+ * responded for an exceptionally long time. Device may also implement
+ * its own protection mechanism against this exception.
+ * Units are in jiffies with a range between 1 - 100 seconds equivalent.
+ * Default to 10 seconds.
+ * Setting 0 means no timeout tracking.
+ */
+#define IOMMU_PAGE_RESPONSE_MAX_TIMEOUT (HZ * 100)
+#define IOMMU_PAGE_RESPONSE_DEF_TIMEOUT (HZ * 10)
+static unsigned long prq_timeout = IOMMU_PAGE_RESPONSE_DEF_TIMEOUT;
+
struct iommu_group {
struct kobject kobj;
struct kobject *devices_kobj;
@@ -157,6 +170,22 @@ static int __init iommu_dma_setup(char *str)
}
early_param("iommu.strict", iommu_dma_setup);
+static int __init iommu_set_prq_timeout(char *str)
+{
+ unsigned long timeout;
+
+ if (!str)
+ return -EINVAL;
+ timeout = simple_strtoul(str, NULL, 0);
+ timeout = timeout * HZ;
+ if (timeout > IOMMU_PAGE_RESPONSE_MAX_TIMEOUT)
+ return -EINVAL;
+ prq_timeout = timeout;
+
+ return 0;
+}
+early_param("iommu.prq_timeout", iommu_set_prq_timeout);
+
static ssize_t iommu_group_attr_show(struct kobject *kobj,
struct attribute *__attr, char *buf)
{
--
2.20.1
On Sun, 26 May 2019 18:09:40 +0200
Eric Auger <[email protected]> wrote:
> From: Jacob Pan <[email protected]>
>
> When an IO page request is processed outside IOMMU subsystem, response
> can be delayed or lost. Add a tunable setup parameter such that user can
> choose the timeout for IOMMU to track pending page requests.
>
> This timeout mechanism is a basic safety net which can be implemented in
> conjunction with credit based or device level page response exception
> handling.
>
> Signed-off-by: Jacob Pan <[email protected]>
> ---
> .../admin-guide/kernel-parameters.txt | 8 +++++
> drivers/iommu/iommu.c | 29 +++++++++++++++++++
> 2 files changed, 37 insertions(+)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 138f6664b2e2..b43f0893d252 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -1813,6 +1813,14 @@
> 1 - Bypass the IOMMU for DMA.
> unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
>
> + iommu.prq_timeout=
> + Timeout in seconds to wait for page response
> + of a pending page request.
> + Format: <integer>
> + Default: 10
> + 0 - no timeout tracking
> + 1 to 100 - allowed range
> +
> io7= [HW] IO7 for Marvel based alpha systems
> See comment before marvel_specify_io7 in
> arch/alpha/kernel/core_marvel.c.
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 13b301cfb10f..64e87d56f471 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -45,6 +45,19 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
> #endif
> static bool iommu_dma_strict __read_mostly = true;
>
> +/*
> + * Timeout to wait for page response of a pending page request. This is
> + * intended as a basic safty net in case a pending page request is not
> + * responded for an exceptionally long time. Device may also implement
> + * its own protection mechanism against this exception.
> + * Units are in jiffies with a range between 1 - 100 seconds equivalent.
> + * Default to 10 seconds.
> + * Setting 0 means no timeout tracking.
> + */
> +#define IOMMU_PAGE_RESPONSE_MAX_TIMEOUT (HZ * 100)
> +#define IOMMU_PAGE_RESPONSE_DEF_TIMEOUT (HZ * 10)
> +static unsigned long prq_timeout = IOMMU_PAGE_RESPONSE_DEF_TIMEOUT;
> +
> struct iommu_group {
> struct kobject kobj;
> struct kobject *devices_kobj;
> @@ -157,6 +170,22 @@ static int __init iommu_dma_setup(char *str)
> }
> early_param("iommu.strict", iommu_dma_setup);
>
> +static int __init iommu_set_prq_timeout(char *str)
> +{
> + unsigned long timeout;
> +
> + if (!str)
> + return -EINVAL;
> + timeout = simple_strtoul(str, NULL, 0);
> + timeout = timeout * HZ;
> + if (timeout > IOMMU_PAGE_RESPONSE_MAX_TIMEOUT)
> + return -EINVAL;
> + prq_timeout = timeout;
> +
> + return 0;
> +}
> +early_param("iommu.prq_timeout", iommu_set_prq_timeout);
> +
> static ssize_t iommu_group_attr_show(struct kobject *kobj,
> struct attribute *__attr, char *buf)
> {
It doesn't seem to make much sense to include this patch without also
including "iommu: handle page response timeout". Was that one lost?
Dropped? Lives elsewhere? Thanks,
Alex
On 03/06/2019 23:32, Alex Williamson wrote:
> It doesn't seem to make much sense to include this patch without also
> including "iommu: handle page response timeout". Was that one lost?
> Dropped? Lives elsewhere?
The first 7 patches come from my sva/api branch, where I had forgotten
to add the "handle page response timeout" patch. I added it back,
probably after Eric sent this version. But I don't think the patch is
ready for upstream, as we still haven't decided how to proceed with
timeouts. Patches 6 and 7 are for debugging, I don't know if they should
go upstream.
Thanks,
Jean
On Tue, 4 Jun 2019 11:52:18 +0100
Jean-Philippe Brucker <[email protected]> wrote:
> On 03/06/2019 23:32, Alex Williamson wrote:
> > It doesn't seem to make much sense to include this patch without
> > also including "iommu: handle page response timeout". Was that one
> > lost? Dropped? Lives elsewhere?
>
> The first 7 patches come from my sva/api branch, where I had forgotten
> to add the "handle page response timeout" patch. I added it back,
> probably after Eric sent this version. But I don't think the patch is
> ready for upstream, as we still haven't decided how to proceed with
> timeouts. Patches 6 and 7 are for debugging, I don't know if they
> should go upstream.
Yeah, we can wait until we all agree on timeouts. It was introduced for
a basic safeguard against unresponsive guests.