[Intel-gfx] [PATCH 56/70] drm/i915: Cache kmap between relocations

Chris Wilson chris at chris-wilson.co.uk
Tue Apr 7 08:21:20 PDT 2015


When doing relocations, we have to obtain a mapping to the page
containing the target address. This is either a kmap or iomap depending
on GPU and its cache coherency. Neighbouring relocation entries are
typically within the same page and so we can cache our kmapping between
them and avoid those pesky TLB flushes.

Note that there is some sleight-of-hand in how the slow relocate works
as the reloc_entry_cache implies pagefaults disabled (as we are inside a
kmap_atomic section). However, the slow relocate code is meant to be the
fallback from the atomic fast path failing. Fortunately it works as we
already have performed the copy_from_user for the relocation array (no
more pagefaults there) and the kmap_atomic cache is enabled after we
have waited upon an active buffer (so no more sleeping in atomic).
Magic!

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 145 +++++++++++++++++++----------
 1 file changed, 96 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 16fd922afb72..9afd2dcba43b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -248,9 +248,48 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 		obj->cache_level != I915_CACHE_NONE);
 }
 
+struct reloc_entry_cache {
+	void *vaddr;
+	unsigned page;
+	enum { KMAP, IOMAP } type;
+};
+
+static void reloc_entry_cache_init(struct reloc_entry_cache *cache)
+{
+	cache->page = -1;
+	cache->vaddr = NULL;
+}
+
+static void reloc_entry_cache_fini(struct reloc_entry_cache *cache)
+{
+	if (cache->vaddr == NULL)
+		return;
+
+	switch (cache->type) {
+	case KMAP: kunmap_atomic(cache->vaddr); break;
+	case IOMAP: io_mapping_unmap_atomic(cache->vaddr); break;
+	}
+}
+
+static void *reloc_kmap(struct drm_i915_gem_object *obj,
+			struct reloc_entry_cache *cache,
+			int page)
+{
+	if (cache->page != page) {
+		if (cache->vaddr)
+			kunmap_atomic(cache->vaddr);
+		cache->page = page;
+		cache->vaddr = kmap_atomic(i915_gem_object_get_page(obj, page));
+		cache->type = KMAP;
+	}
+
+	return cache->vaddr;
+}
+
 static int
 relocate_entry_cpu(struct drm_i915_gem_object *obj,
 		   struct drm_i915_gem_relocation_entry *reloc,
+		   struct reloc_entry_cache *cache,
 		   uint64_t target_offset)
 {
 	struct drm_device *dev = obj->base.dev;
@@ -263,30 +302,41 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
-				reloc->offset >> PAGE_SHIFT));
+	vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
 	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
 
 	if (INTEL_INFO(dev)->gen >= 8) {
-		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
-
-		if (page_offset == 0) {
-			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
-			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
+		page_offset += sizeof(uint32_t);
+		if (page_offset == PAGE_SIZE) {
+			vaddr = reloc_kmap(obj, cache, cache->page + 1);
+			page_offset = 0;
 		}
-
 		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
 	}
 
-	kunmap_atomic(vaddr);
-
 	return 0;
 }
 
+static void *reloc_iomap(struct drm_i915_private *i915,
+			 struct reloc_entry_cache *cache,
+			 uint64_t offset)
+{
+	if (cache->page != offset >> PAGE_SHIFT) {
+		if (cache->vaddr)
+			io_mapping_unmap_atomic(cache->vaddr);
+		cache->page = offset >> PAGE_SHIFT;
+		cache->vaddr =
+			io_mapping_map_atomic_wc(i915->gtt.mappable,
+						 offset & PAGE_MASK);
+		cache->type = IOMAP;
+	}
+
+	return cache->vaddr;
+}
 static int
 relocate_entry_gtt(struct drm_i915_gem_object *obj,
 		   struct drm_i915_gem_relocation_entry *reloc,
+		   struct reloc_entry_cache *cache,
 		   uint64_t target_offset)
 {
 	struct drm_device *dev = obj->base.dev;
@@ -307,26 +357,17 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 	/* Map the page containing the relocation we're going to perform.  */
 	offset = i915_gem_obj_ggtt_offset(obj);
 	offset += reloc->offset;
-	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
-					      offset & PAGE_MASK);
+	reloc_page = reloc_iomap(dev_priv, cache, offset);
 	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
 
 	if (INTEL_INFO(dev)->gen >= 8) {
 		offset += sizeof(uint32_t);
-
-		if (offset_in_page(offset) == 0) {
-			io_mapping_unmap_atomic(reloc_page);
-			reloc_page =
-				io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
-							 offset);
-		}
-
+		if (offset_in_page(offset) == 0)
+			reloc_page = reloc_iomap(dev_priv, cache, offset);
 		iowrite32(upper_32_bits(delta),
 			  reloc_page + offset_in_page(offset));
 	}
 
-	io_mapping_unmap_atomic(reloc_page);
-
 	return 0;
 }
 
@@ -342,6 +383,7 @@ clflush_write32(void *addr, uint32_t value)
 static int
 relocate_entry_clflush(struct drm_i915_gem_object *obj,
 		       struct drm_i915_gem_relocation_entry *reloc,
+		       struct reloc_entry_cache *cache,
 		       uint64_t target_offset)
 {
 	struct drm_device *dev = obj->base.dev;
@@ -354,31 +396,26 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
-				reloc->offset >> PAGE_SHIFT));
+	vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
 	clflush_write32(vaddr + page_offset, lower_32_bits(delta));
 
 	if (INTEL_INFO(dev)->gen >= 8) {
-		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
-
-		if (page_offset == 0) {
-			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
-			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
+		page_offset += sizeof(uint32_t);
+		if (page_offset == PAGE_SIZE) {
+			vaddr = reloc_kmap(obj, cache, cache->page + 1);
+			page_offset = 0;
 		}
-
 		clflush_write32(vaddr + page_offset, upper_32_bits(delta));
 	}
 
-	kunmap_atomic(vaddr);
-
 	return 0;
 }
 
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_vmas *eb,
-				   struct drm_i915_gem_relocation_entry *reloc)
+				   struct drm_i915_gem_relocation_entry *reloc,
+				   struct reloc_entry_cache *cache)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_gem_object *target_obj;
@@ -463,11 +500,11 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EFAULT;
 
 	if (use_cpu_reloc(obj))
-		ret = relocate_entry_cpu(obj, reloc, target_offset);
+		ret = relocate_entry_cpu(obj, reloc, cache, target_offset);
 	else if (obj->map_and_fenceable)
-		ret = relocate_entry_gtt(obj, reloc, target_offset);
+		ret = relocate_entry_gtt(obj, reloc, cache, target_offset);
 	else if (cpu_has_clflush)
-		ret = relocate_entry_clflush(obj, reloc, target_offset);
+		ret = relocate_entry_clflush(obj, reloc, cache, target_offset);
 	else {
 		WARN_ONCE(1, "Impossible case in relocation handling\n");
 		ret = -ENODEV;
@@ -490,9 +527,11 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
 	struct drm_i915_gem_relocation_entry __user *user_relocs;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	int remain, ret;
+	struct reloc_entry_cache cache;
+	int remain, ret = 0;
 
 	user_relocs = to_user_ptr(entry->relocs_ptr);
+	reloc_entry_cache_init(&cache);
 
 	remain = entry->relocation_count;
 	while (remain) {
@@ -502,21 +541,24 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 			count = ARRAY_SIZE(stack_reloc);
 		remain -= count;
 
-		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
-			return -EFAULT;
+		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) {
+			ret = -EFAULT;
+			goto out;
+		}
 
 		do {
 			u64 offset = r->presumed_offset;
 
-			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
+			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
 			if (ret)
-				return ret;
+				goto out;
 
 			if (r->presumed_offset != offset &&
 			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
 						    &r->presumed_offset,
 						    sizeof(r->presumed_offset))) {
-				return -EFAULT;
+				ret = -EFAULT;
+				goto out;
 			}
 
 			user_relocs++;
@@ -524,7 +566,9 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 		} while (--count);
 	}
 
-	return 0;
+out:
+	reloc_entry_cache_fini(&cache);
+	return ret;
 #undef N_RELOC
 }
 
@@ -534,15 +578,18 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 				      struct drm_i915_gem_relocation_entry *relocs)
 {
 	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	int i, ret;
+	struct reloc_entry_cache cache;
+	int i, ret = 0;
 
+	reloc_entry_cache_init(&cache);
 	for (i = 0; i < entry->relocation_count; i++) {
-		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
+		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
 		if (ret)
-			return ret;
+			break;
 	}
+	reloc_entry_cache_fini(&cache);
 
-	return 0;
+	return ret;
 }
 
 static int
-- 
2.1.4



More information about the Intel-gfx mailing list