[PATCH 1/3] iommu/iova: Remove size-alignment for large allocations
Chris Wilson
chris at chris-wilson.co.uk
Sat Jan 16 12:21:24 UTC 2021
fi-cml-drallion is configured with highly restricted PCI windows:
<6>[ 1.773230] PCI host bridge to bus 0000:00
<6>[ 1.773276] pci_bus 0000:00: root bus resource [io 0x0000-0x0cf7 window]
<6>[ 1.773335] pci_bus 0000:00: root bus resource [io 0x0d00-0xffff window]
<6>[ 1.773394] pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000bffff window]
<6>[ 1.773457] pci_bus 0000:00: root bus resource [mem 0x000c0000-0x000c3fff window]
<6>[ 1.773520] pci_bus 0000:00: root bus resource [mem 0x000c4000-0x000c7fff window]
<6>[ 1.773583] pci_bus 0000:00: root bus resource [mem 0x000c8000-0x000cbfff window]
<6>[ 1.773645] pci_bus 0000:00: root bus resource [mem 0x000cc000-0x000cffff window]
<6>[ 1.773707] pci_bus 0000:00: root bus resource [mem 0x000d0000-0x000d3fff window]
<6>[ 1.773770] pci_bus 0000:00: root bus resource [mem 0x000d4000-0x000d7fff window]
<6>[ 1.773832] pci_bus 0000:00: root bus resource [mem 0x000d8000-0x000dbfff window]
<6>[ 1.773894] pci_bus 0000:00: root bus resource [mem 0x000dc000-0x000dffff window]
<6>[ 1.773956] pci_bus 0000:00: root bus resource [mem 0x000e0000-0x000e3fff window]
<6>[ 1.774014] pci_bus 0000:00: root bus resource [mem 0x000e4000-0x000e7fff window]
<6>[ 1.774076] pci_bus 0000:00: root bus resource [mem 0x000e8000-0x000ebfff window]
<6>[ 1.774138] pci_bus 0000:00: root bus resource [mem 0x000ec000-0x000effff window]
<6>[ 1.774200] pci_bus 0000:00: root bus resource [mem 0x000f0000-0x000fffff window]
<6>[ 1.774267] pci_bus 0000:00: root bus resource [mem 0x9f800000-0xdfffffff window]
<6>[ 1.774330] pci_bus 0000:00: root bus resource [mem 0x25e800000-0x7fffffffff window]
<6>[ 1.774395] pci_bus 0000:00: root bus resource [mem 0xfc800000-0xfe7fffff window]
<6>[ 1.774458] pci_bus 0000:00: root bus resource [mem 0xfed40000-0xfed47fff window]
<6>[ 1.774521] pci_bus 0000:00: root bus resource [bus 00-ff]
that limits the usable DMA address space to <10G. Combined with the
fragmentation from top-down allocations starting at 4G, an attempt to
allocate a size-aligned (SZ_4G+SZ_4K) request fails, despite having a 6G
hole. The size-alignment is only required for the rcache, so remove the
restriction for large allocations.
It would also be useful for the limited DMA32 [SAC] allocation requests
to be allocated bottom-up with the extra large allocations going top
down.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2929
---
drivers/iommu/dma-iommu.c | 8 --------
drivers/iommu/iova.c | 31 ++++++++++++++++++++++++++-----
2 files changed, 26 insertions(+), 13 deletions(-)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4078358ed66e..34353d8a785d 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -436,14 +436,6 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
shift = iova_shift(iovad);
iova_len = size >> shift;
- /*
- * Freeing non-power-of-two-sized allocations back into the IOVA caches
- * will come back to bite us badly, so we have to waste a bit of space
- * rounding up anything cacheable to make sure that can't happen. The
- * order of the unadjusted size will still match upon freeing.
- */
- if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
- iova_len = roundup_pow_of_two(iova_len);
dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index d20b8b333d30..3a808d67acca 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -426,15 +426,27 @@ unsigned long
alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
unsigned long limit_pfn, bool flush_rcache)
{
- unsigned long iova_pfn;
+ unsigned long iova_pfn, iova_len;
struct iova *new_iova;
+ bool align = false;
- iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
- if (iova_pfn)
- return iova_pfn;
+ /*
+ * Freeing non-power-of-two-sized allocations back into the IOVA caches
+ * will come back to bite us badly, so we have to waste a bit of space
+ * rounding up anything cacheable to make sure that can't happen. The
+ * order of the unadjusted size will still match upon freeing.
+ */
+ if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) {
+ iova_len = roundup_pow_of_two(size);
+ iova_pfn = iova_rcache_get(iovad, iova_len, limit_pfn + 1);
+ if (iova_pfn)
+ return iova_pfn;
+
+ align = true;
+ }
retry:
- new_iova = alloc_iova(iovad, size, limit_pfn, true);
+ new_iova = alloc_iova(iovad, iova_len, limit_pfn, align);
if (!new_iova) {
unsigned int cpu;
@@ -446,6 +458,9 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
for_each_online_cpu(cpu)
free_cpu_cached_iovas(cpu, iovad);
free_global_cached_iovas(iovad);
+
+ align = false;
+ iova_len = size;
goto retry;
}
@@ -918,6 +933,12 @@ static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
return false;
+ if (!is_power_of_two(size))
+ return false;
+
+ if (pfn & (size - 1))
+ return false;
+
return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
}
--
2.20.1
More information about the Intel-gfx-trybot
mailing list