Document the "iova-best-fit" device-tree property, which is used to
describe that the iommu master is constrained on memory and the system
must put more effort when allocating IOVAs to avoid holes/gaps in
memory. This improves the memory utilization and helps with memory
fragmentation issues in some cases, but it could take longer to allocate
an IOVA compared with the default "first-fit" algorithm.
Signed-off-by: Georgi Djakov <[email protected]>
---
Documentation/devicetree/bindings/iommu/iommu.txt | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt
index 26ba9e530f13..ca1b4813c5bf 100644
--- a/Documentation/devicetree/bindings/iommu/iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/iommu.txt
@@ -88,6 +88,10 @@ prevent any driver from properly setting up the translations.
Optional properties:
--------------------
+- iova-best-fit: When present, the best-fit algorithm will be used, instead
+ of first-fit. This reduces memory fragmentation when allocating IOVAs in
+ some cases, but may also increase the time it takes to allocate an IOVA.
+
- pasid-num-bits: Some masters support multiple address spaces for DMA, by
tagging DMA transactions with an address space identifier. By default,
this is 0, which means that the device only has one address space.
In some multimedia use-cases that involve allocating buffers less than
PAGE_SIZE, combined with allocating ~400MB buffers, the IOVA space is
getting fragmented and allocation failures are observed.
In these scenarios, there are hundreds of randomly distributed non-power
of two allocations, which in some cases leave behind holes of up to 100MB
in the IOVA space.
To reduce the memory fragmentation in these use-cases, introduce the
"best-fit" algorithm, which can be enabled by individual devices with
a device-tree property. If the DT property is not present, the default
"first-fit" algorithm will be used.
Example usage:
device {
compatible = "example-device";
iommus = <&smmu 0x21 0>;
iova-best-fit;
};
Signed-off-by: Georgi Djakov <[email protected]>
---
RFC v1: https://lore.kernel.org/r/[email protected]/
- Use a DT property instead of an API to enable the best-fit algorithm for the device
- Rename __alloc_and_insert_iova_range() to __alloc_and_insert_iova_first_fit()
drivers/iommu/dma-iommu.c | 5 +++
drivers/iommu/iova.c | 79 ++++++++++++++++++++++++++++++++++++---
include/linux/iova.h | 1 +
3 files changed, 80 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f798c44e0903..1be25e54d050 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -582,6 +582,11 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
if (ret)
goto done_unlock;
+ if (dev && dev->of_node) {
+ if (of_property_read_bool(dev->of_node, "iommu-best-fit"))
+ iovad->best_fit = true;
+ }
+
/* If the FQ fails we can simply fall back to strict mode */
if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
domain->type = IOMMU_DOMAIN_DMA;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index a44ad92fc5eb..34ad05a4095f 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -64,6 +64,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
iovad->cached_node = &iovad->anchor.node;
iovad->cached32_node = &iovad->anchor.node;
iovad->granule = granule;
+ iovad->best_fit = false;
iovad->start_pfn = start_pfn;
iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
iovad->max32_alloc_size = iovad->dma_32bit_pfn;
@@ -175,9 +176,10 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova,
rb_insert_color(&iova->node, root);
}
-static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
- unsigned long size, unsigned long limit_pfn,
- struct iova *new, bool size_aligned)
+static int __alloc_and_insert_iova_first_fit(struct iova_domain *iovad,
+ unsigned long size,
+ unsigned long limit_pfn,
+ struct iova *new, bool size_aligned)
{
struct rb_node *curr, *prev;
struct iova *curr_iova;
@@ -236,6 +238,68 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
return -ENOMEM;
}
+static int __alloc_and_insert_iova_best_fit(struct iova_domain *iovad,
+ unsigned long size,
+ unsigned long limit_pfn,
+ struct iova *new, bool size_aligned)
+{
+ struct rb_node *curr, *prev;
+ struct iova *curr_iova, *prev_iova;
+ unsigned long flags;
+ unsigned long align_mask = ~0UL;
+ struct rb_node *candidate_rb_parent;
+ unsigned long new_pfn, candidate_pfn = ~0UL;
+ unsigned long gap, candidate_gap = ~0UL;
+
+ if (size_aligned)
+ align_mask <<= fls_long(size - 1);
+
+ /* Walk the tree backwards */
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ curr = &iovad->anchor.node;
+ prev = rb_prev(curr);
+ for (; prev; curr = prev, prev = rb_prev(curr)) {
+ curr_iova = rb_entry(curr, struct iova, node);
+ prev_iova = rb_entry(prev, struct iova, node);
+
+ limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
+ new_pfn = (limit_pfn - size) & align_mask;
+ gap = curr_iova->pfn_lo - prev_iova->pfn_hi - 1;
+ if (limit_pfn >= size && new_pfn > prev_iova->pfn_hi && gap < candidate_gap) {
+ candidate_gap = gap;
+ candidate_pfn = new_pfn;
+ candidate_rb_parent = curr;
+ if (gap == size)
+ goto insert;
+ }
+ }
+
+ curr_iova = rb_entry(curr, struct iova, node);
+ limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
+ new_pfn = (limit_pfn - size) & align_mask;
+ gap = curr_iova->pfn_lo - iovad->start_pfn;
+ if (limit_pfn >= size && new_pfn >= iovad->start_pfn && gap < candidate_gap) {
+ candidate_gap = gap;
+ candidate_pfn = new_pfn;
+ candidate_rb_parent = curr;
+ }
+
+insert:
+ if (candidate_pfn == ~0UL) {
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* pfn_lo will point to size aligned address if size_aligned is set */
+ new->pfn_lo = candidate_pfn;
+ new->pfn_hi = new->pfn_lo + size - 1;
+
+ /* If we have 'prev', it's a valid place to start the insertion. */
+ iova_insert_rbtree(&iovad->rbroot, new, candidate_rb_parent);
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ return 0;
+}
+
static struct kmem_cache *iova_cache;
static unsigned int iova_cache_users;
static DEFINE_MUTEX(iova_cache_mutex);
@@ -322,8 +386,13 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
if (!new_iova)
return NULL;
- ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
- new_iova, size_aligned);
+ if (iovad->best_fit) {
+ ret = __alloc_and_insert_iova_best_fit(iovad, size, limit_pfn + 1,
+ new_iova, size_aligned);
+ } else {
+ ret = __alloc_and_insert_iova_first_fit(iovad, size, limit_pfn + 1,
+ new_iova, size_aligned);
+ }
if (ret) {
free_iova_mem(new_iova);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 83c00fac2acb..c0c77f676e17 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -38,6 +38,7 @@ struct iova_domain {
struct iova_rcache *rcaches;
struct hlist_node cpuhp_dead;
+ bool best_fit; /* best-fit algorithm is enabled */
};
static inline unsigned long iova_size(struct iova *iova)
On Mon, Dec 12, 2022 at 01:51:37PM -0800, Georgi Djakov wrote:
> Document the "iova-best-fit" device-tree property, which is used to
> describe that the iommu master is constrained on memory and the system
> must put more effort when allocating IOVAs to avoid holes/gaps in
> memory. This improves the memory utilization and helps with memory
> fragmentation issues in some cases, but it could take longer to allocate
> an IOVA compared with the default "first-fit" algorithm.
>
> Signed-off-by: Georgi Djakov <[email protected]>
> ---
> Documentation/devicetree/bindings/iommu/iommu.txt | 4 ++++
> 1 file changed, 4 insertions(+)
No new properties in common .txt files. Schemas only.
However, this looks like kernel policy which doesn't belong in DT.
> diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt
> index 26ba9e530f13..ca1b4813c5bf 100644
> --- a/Documentation/devicetree/bindings/iommu/iommu.txt
> +++ b/Documentation/devicetree/bindings/iommu/iommu.txt
> @@ -88,6 +88,10 @@ prevent any driver from properly setting up the translations.
>
> Optional properties:
> --------------------
> +- iova-best-fit: When present, the best-fit algorithm will be used, instead
> + of first-fit. This reduces memory fragmentation when allocating IOVAs in
> + some cases, but may also increase the time it takes to allocate an IOVA.
> +
> - pasid-num-bits: Some masters support multiple address spaces for DMA, by
> tagging DMA transactions with an address space identifier. By default,
> this is 0, which means that the device only has one address space.
>