[PATCH 5/6] drm/etnaviv: add cmdbuf suballocator

Wed Jan 18 11:25:21 UTC 2017

There are 3 big benefits to suballocating a single big DMA buffer
for command submission:

1. Avoid hammering CMA. The old way of allocating and freeing a DMA
   buffer for each submission was hitting some of the real slow
   pathes in CMA, as this allocator was not designed for a concurrent
   small buffers load.

2. Less TLB flushes on IOMMUv2. If a new command buffer is mapped into
   the GPU address space the MMU TLBs need to be flushed. By having
   one big buffer statically mapped to the GPU, a lot of those flushes
   can be avoided.

3. No funky workarounds for GC3000. The FE TLB flush on GC3000 isn't
   reliable. To work around that we tried to lay out the cmdbufs in
   the GPU address space in a way to avoid this issue. This hasn't
   always worked if the address space is crowded. A single statically
   mapped buffer avoids the erratum completely.

Signed-off-by: Lucas Stach <l.stach at pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c     | 123 +++++++++++++++++++++++----
 drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h     |  20 +++--
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |   5 +-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c        |  14 ++-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h        |   5 +-
 drivers/gpu/drm/etnaviv/etnaviv_mmu.c        |  45 +++++-----
 drivers/gpu/drm/etnaviv/etnaviv_mmu.h        |  10 ++-
 7 files changed, 165 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
index 1ad118c6c64e..633e0f07cbac 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
@@ -14,51 +14,140 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <drm/drm_mm.h>
+
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
 
-struct etnaviv_cmdbuf *etnaviv_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
-	size_t nr_bos)
+#define SUBALLOC_SIZE		SZ_256K
+#define SUBALLOC_GRANULE	SZ_4K
+#define SUBALLOC_GRANULES	(SUBALLOC_SIZE / SUBALLOC_GRANULE)
+
+struct etnaviv_cmdbuf_suballoc {
+	/* suballocated dma buffer properties */
+	struct etnaviv_gpu *gpu;
+	void *vaddr;
+	dma_addr_t paddr;
+
+	/* GPU mapping */
+	u32 iova;
+	struct drm_mm_node vram_node; /* only used on MMUv2 */
+
+	/* allocation management */
+	struct mutex lock;
+	DECLARE_BITMAP(granule_map, SUBALLOC_GRANULES);
+	int free_space;
+	wait_queue_head_t free_event;
+};
+
+struct etnaviv_cmdbuf_suballoc *
+etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu)
+{
+	struct etnaviv_cmdbuf_suballoc *suballoc;
+	int ret;
+
+	suballoc = kzalloc(sizeof(*suballoc), GFP_KERNEL);
+	if (!suballoc)
+		return ERR_PTR(-ENOMEM);
+
+	suballoc->gpu = gpu;
+	mutex_init(&suballoc->lock);
+	init_waitqueue_head(&suballoc->free_event);
+
+	suballoc->vaddr = dma_alloc_wc(gpu->dev, SUBALLOC_SIZE,
+				       &suballoc->paddr, GFP_KERNEL);
+	if (!suballoc->vaddr)
+		goto free_suballoc;
+
+	ret = etnaviv_iommu_get_suballoc_va(gpu, suballoc->paddr,
+					    &suballoc->vram_node, SUBALLOC_SIZE,
+					    &suballoc->iova);
+	if (ret)
+		goto free_dma;
+
+	return suballoc;
+
+free_dma:
+	dma_free_wc(gpu->dev, SUBALLOC_SIZE, suballoc->vaddr, suballoc->paddr);
+free_suballoc:
+	kfree(suballoc);
+
+	return NULL;
+}
+
+void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc)
+{
+	etnaviv_iommu_put_suballoc_va(suballoc->gpu, &suballoc->vram_node,
+				      SUBALLOC_SIZE, suballoc->iova);
+	dma_free_wc(suballoc->gpu->dev, SUBALLOC_SIZE, suballoc->vaddr,
+		    suballoc->paddr);
+	kfree(suballoc);
+}
+
+struct etnaviv_cmdbuf *
+etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
+		   size_t nr_bos)
 {
 	struct etnaviv_cmdbuf *cmdbuf;
 	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
 				 sizeof(*cmdbuf));
+	int granule_offs, order, ret;
 
 	cmdbuf = kzalloc(sz, GFP_KERNEL);
 	if (!cmdbuf)
 		return NULL;
 
-	if (gpu->mmu->version == ETNAVIV_IOMMU_V2)
-		size = ALIGN(size, SZ_4K);
+	cmdbuf->suballoc = suballoc;
+	cmdbuf->size = size;
 
-	cmdbuf->vaddr = dma_alloc_wc(gpu->dev, size, &cmdbuf->paddr,
-				     GFP_KERNEL);
-	if (!cmdbuf->vaddr) {
-		kfree(cmdbuf);
-		return NULL;
+	order = order_base_2(ALIGN(size, SUBALLOC_GRANULE) / SUBALLOC_GRANULE);
+retry:
+	mutex_lock(&suballoc->lock);
+	granule_offs = bitmap_find_free_region(suballoc->granule_map,
+					SUBALLOC_GRANULES, order);
+	if (granule_offs < 0) {
+		suballoc->free_space = 0;
+		mutex_unlock(&suballoc->lock);
+		ret = wait_event_interruptible_timeout(suballoc->free_event,
+						       suballoc->free_space,
+						       msecs_to_jiffies(10 * 1000));
+		if (!ret) {
+			dev_err(suballoc->gpu->dev,
+				"Timeout waiting for cmdbuf space\n");
+			return NULL;
+		}
+		goto retry;
 	}
-
-	cmdbuf->gpu = gpu;
-	cmdbuf->size = size;
+	mutex_unlock(&suballoc->lock);
+	cmdbuf->suballoc_offset = granule_offs * SUBALLOC_GRANULE;
+	cmdbuf->vaddr = suballoc->vaddr + cmdbuf->suballoc_offset;
 
 	return cmdbuf;
 }
 
 void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
 {
-	etnaviv_iommu_put_cmdbuf_va(cmdbuf->gpu, cmdbuf);
-	dma_free_wc(cmdbuf->gpu->dev, cmdbuf->size, cmdbuf->vaddr,
-		    cmdbuf->paddr);
+	struct etnaviv_cmdbuf_suballoc *suballoc = cmdbuf->suballoc;
+	int order = order_base_2(ALIGN(cmdbuf->size, SUBALLOC_GRANULE) /
+				 SUBALLOC_GRANULE);
+
+	mutex_lock(&suballoc->lock);
+	bitmap_release_region(suballoc->granule_map,
+			      cmdbuf->suballoc_offset / SUBALLOC_GRANULE,
+			      order);
+	suballoc->free_space = 1;
+	mutex_unlock(&suballoc->lock);
+	wake_up_all(&suballoc->free_event);
 	kfree(cmdbuf);
 }
 
 u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf)
 {
-	return etnaviv_iommu_get_cmdbuf_va(buf->gpu, buf);
+	return buf->suballoc->iova + buf->suballoc_offset;
 }
 
 dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf)
 {
-	return buf->paddr;
+	return buf->suballoc->paddr + buf->suballoc_offset;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
index 244358778407..a9a6d4b521fb 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
@@ -20,18 +20,19 @@
 #include <drm/drm_mm.h>
 #include <linux/types.h>
 
+struct etnaviv_gpu;
+struct etnaviv_cmdbuf_suballoc;
+
 struct etnaviv_cmdbuf {
-	/* device this cmdbuf is allocated for */
-	struct etnaviv_gpu *gpu;
+	/* suballocator this cmdbuf is allocated from */
+	struct etnaviv_cmdbuf_suballoc *suballoc;
 	/* user context key, must be unique between all active users */
 	struct etnaviv_file_private *ctx;
 	/* cmdbuf properties */
+	int suballoc_offset;
 	void *vaddr;
-	dma_addr_t paddr;
 	u32 size;
 	u32 user_size;
-	/* vram node used if the cmdbuf is mapped through the MMUv2 */
-	struct drm_mm_node vram_node;
 	/* fence after which this buffer is to be disposed */
 	struct dma_fence *fence;
 	/* target exec state */
@@ -43,6 +44,15 @@ struct etnaviv_cmdbuf {
 	struct etnaviv_vram_mapping *bo_map[0];
 };
 
+struct etnaviv_cmdbuf_suballoc *
+etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu);
+void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc);
+
+struct etnaviv_cmdbuf *
+etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
+		   size_t nr_bos);
+void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
+
 u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf);
 dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 7e1fefef2f2c..726090d7a6ac 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -333,8 +333,9 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	bos = drm_malloc_ab(args->nr_bos, sizeof(*bos));
 	relocs = drm_malloc_ab(args->nr_relocs, sizeof(*relocs));
 	stream = drm_malloc_ab(1, args->stream_size);
-	cmdbuf = etnaviv_cmdbuf_new(gpu, ALIGN(args->stream_size, 8) + 8,
-					args->nr_bos);
+	cmdbuf = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc,
+				    ALIGN(args->stream_size, 8) + 8,
+				    args->nr_bos);
 	if (!bos || !relocs || !stream || !cmdbuf) {
 		ret = -ENOMEM;
 		goto err_submit_cmds;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 3a689c3fbe5b..130d7d517a19 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -694,8 +694,15 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}
 
+	gpu->cmdbuf_suballoc = etnaviv_cmdbuf_suballoc_new(gpu);
+	if (IS_ERR(gpu->cmdbuf_suballoc)) {
+		dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n");
+		ret = PTR_ERR(gpu->cmdbuf_suballoc);
+		goto fail;
+	}
+
 	/* Create buffer: */
-	gpu->buffer = etnaviv_cmdbuf_new(gpu, PAGE_SIZE, 0);
+	gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0);
 	if (!gpu->buffer) {
 		ret = -ENOMEM;
 		dev_err(gpu->dev, "could not create command buffer\n");
@@ -1598,6 +1605,11 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 		gpu->buffer = NULL;
 	}
 
+	if (gpu->cmdbuf_suballoc) {
+		etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
+		gpu->cmdbuf_suballoc = NULL;
+	}
+
 	if (gpu->mmu) {
 		etnaviv_iommu_destroy(gpu->mmu);
 		gpu->mmu = NULL;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 9c10ffeff77e..1c0606ea7d5e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -92,6 +92,7 @@ struct etnaviv_event {
 	struct dma_fence *fence;
 };
 
+struct etnaviv_cmdbuf_suballoc;
 struct etnaviv_cmdbuf;
 
 struct etnaviv_gpu {
@@ -135,6 +136,7 @@ struct etnaviv_gpu {
 	int irq;
 
 	struct etnaviv_iommu *mmu;
+	struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc;
 
 	/* Power Control: */
 	struct clk *clk_bus;
@@ -188,9 +190,6 @@ int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
 int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf);
-struct etnaviv_cmdbuf *etnaviv_cmdbuf_new(struct etnaviv_gpu *gpu,
-					      u32 size, size_t nr_bos);
-void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
 int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
 void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index 056685bd33b2..dcc86d8eeb98 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -321,55 +321,50 @@ void etnaviv_iommu_restore(struct etnaviv_gpu *gpu)
 		etnaviv_iommuv2_restore(gpu);
 }
 
-u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu,
-				struct etnaviv_cmdbuf *buf)
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
+				  struct drm_mm_node *vram_node, size_t size,
+				  u32 *iova)
 {
 	struct etnaviv_iommu *mmu = gpu->mmu;
 
 	if (mmu->version == ETNAVIV_IOMMU_V1) {
-		return buf->paddr - gpu->memory_base;
+		*iova = paddr - gpu->memory_base;
+		return 0;
 	} else {
 		int ret;
 
-		if (buf->vram_node.allocated)
-			return (u32)buf->vram_node.start;
-
 		mutex_lock(&mmu->lock);
-		ret = etnaviv_iommu_find_iova(mmu, &buf->vram_node,
-					      buf->size + SZ_64K);
+		ret = etnaviv_iommu_find_iova(mmu, vram_node, size);
 		if (ret < 0) {
 			mutex_unlock(&mmu->lock);
-			return 0;
+			return ret;
 		}
-		ret = iommu_map(mmu->domain, buf->vram_node.start, buf->paddr,
-				buf->size, IOMMU_READ);
+		ret = iommu_map(mmu->domain, vram_node->start, paddr, size,
+				IOMMU_READ);
 		if (ret < 0) {
-			drm_mm_remove_node(&buf->vram_node);
+			drm_mm_remove_node(vram_node);
 			mutex_unlock(&mmu->lock);
-			return 0;
+			return ret;
 		}
-		/*
-		 * At least on GC3000 the FE MMU doesn't properly flush old TLB
-		 * entries. Make sure to space the command buffers out in a way
-		 * that the FE MMU prefetch won't load invalid entries.
-		 */
-		mmu->last_iova = buf->vram_node.start + buf->size + SZ_64K;
+		mmu->last_iova = vram_node->start + size;
 		gpu->mmu->need_flush = true;
 		mutex_unlock(&mmu->lock);
 
-		return (u32)buf->vram_node.start;
+		*iova = (u32)vram_node->start;
+		return 0;
 	}
 }
 
-void etnaviv_iommu_put_cmdbuf_va(struct etnaviv_gpu *gpu,
-				 struct etnaviv_cmdbuf *buf)
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
+				   struct drm_mm_node *vram_node, size_t size,
+				   u32 iova)
 {
 	struct etnaviv_iommu *mmu = gpu->mmu;
 
-	if (mmu->version == ETNAVIV_IOMMU_V2 && buf->vram_node.allocated) {
+	if (mmu->version == ETNAVIV_IOMMU_V2) {
 		mutex_lock(&mmu->lock);
-		iommu_unmap(mmu->domain, buf->vram_node.start, buf->size);
-		drm_mm_remove_node(&buf->vram_node);
+		iommu_unmap(mmu->domain,iova, size);
+		drm_mm_remove_node(vram_node);
 		mutex_unlock(&mmu->lock);
 	}
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
index e787e49c9693..54be289e5981 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
@@ -62,10 +62,12 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
 	struct etnaviv_vram_mapping *mapping);
 void etnaviv_iommu_destroy(struct etnaviv_iommu *iommu);
 
-u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu,
-				struct etnaviv_cmdbuf *buf);
-void etnaviv_iommu_put_cmdbuf_va(struct etnaviv_gpu *gpu,
-				 struct etnaviv_cmdbuf *buf);
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
+				  struct drm_mm_node *vram_node, size_t size,
+				  u32 *iova);
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
+				   struct drm_mm_node *vram_node, size_t size,
+				   u32 iova);
 
 size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu);
 void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf);
-- 
2.11.0