[Intel-gfx] [PATCH] drm/i915: Avoid using mappable space for relocation processing through the CPU
Chris Wilson
chris at chris-wilson.co.uk
Sun Dec 4 13:26:41 CET 2011
We try to avoid writing the relocations through the uncached GTT, if the
buffer is currently in the CPU write domain and so will be flushed out to
main memory afterwards anyway. Also on SandyBridge we can safely write
to the pages in cacheable memory, so long as the buffer is LLC mapped.
In either of these caches, we therefore do not need to force the
reallocation of the buffer into the mappable region of the GTT, reducing
the aperture pressure.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniel Vetter <daniel at ffwll.ch>
---
v2: We need to wait on LLC-mapped buffers before performing the relocs.
This is neatly done by making the GTT/CPU paths symmetric.
Should fix i-g-t/tests/gem_reloc_vs_gpu when the test case itself is
debugged ;-)
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem.c | 4 +--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 36 +++++++++++++++++++--------
3 files changed, 28 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bdbd6d8..1cafe32 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1226,6 +1226,8 @@ int __must_check
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
int __must_check
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
u32 alignment,
struct intel_ring_buffer *pipelined);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 036bc58..7ada9d2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -39,8 +39,6 @@
static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
-static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
- bool write);
static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
uint64_t offset,
uint64_t size);
@@ -3102,7 +3100,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
* This function returns when the move is complete, including waiting on
* flushes to occur.
*/
-static int
+int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
{
uint32_t old_write_domain, old_read_domains;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c918124..57e5b78 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -265,6 +265,12 @@ eb_destroy(struct eb_objects *eb)
kfree(eb);
}
+static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
+{
+ return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
+ obj->cache_level != I915_CACHE_NONE);
+}
+
static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
struct eb_objects *eb,
@@ -350,11 +356,19 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
return ret;
}
+ /* We can't wait for rendering with pagefaults disabled */
+ if (obj->active && in_atomic())
+ return -EFAULT;
+
reloc->delta += target_offset;
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
+ if (use_cpu_reloc(obj)) {
uint32_t page_offset = reloc->offset & ~PAGE_MASK;
char *vaddr;
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ if (ret)
+ return ret;
+
vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
*(uint32_t *)(vaddr + page_offset) = reloc->delta;
kunmap_atomic(vaddr);
@@ -363,10 +377,6 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
uint32_t __iomem *reloc_entry;
void __iomem *reloc_page;
- /* We can't wait for rendering with pagefaults disabled */
- if (obj->active && in_atomic())
- return -EFAULT;
-
ret = i915_gem_object_set_to_gtt_domain(obj, 1);
if (ret)
return ret;
@@ -463,6 +473,13 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
#define __EXEC_OBJECT_HAS_FENCE (1<<31)
static int
+need_reloc_mappable(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+ return entry->relocation_count && !use_cpu_reloc(obj);
+}
+
+static int
pin_and_fence_object(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *ring)
{
@@ -475,8 +492,7 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable =
- entry->relocation_count ? true : need_fence;
+ need_mappable = need_fence || need_reloc_mappable(obj);
ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
if (ret)
@@ -532,8 +548,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable =
- entry->relocation_count ? true : need_fence;
+ need_mappable = need_fence || need_reloc_mappable(obj);
if (need_mappable)
list_move(&obj->exec_list, &ordered_objects);
@@ -573,8 +588,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable =
- entry->relocation_count ? true : need_fence;
+ need_mappable = need_fence || need_reloc_mappable(obj);
if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
(need_mappable && !obj->map_and_fenceable))
--
1.7.7.3
More information about the Intel-gfx
mailing list