[Intel-gfx] [PATCH 171/190] drm/i915: Pass vma to relocate entry
Chris Wilson
chris at chris-wilson.co.uk
Mon Jan 11 03:01:12 PST 2016
We can simplify our tracking of pending writes in an execbuf to the
single bit in the vma->exec_entry->flags, but that requires the
relocation function knowing the object's vma. Pass it along.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 3 +-
drivers/gpu/drm/i915/i915_gem.c | 12 ++--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 104 ++++++++++++-----------------
drivers/gpu/drm/i915/intel_display.c | 2 +-
4 files changed, 52 insertions(+), 69 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 29e1d2ed8b05..2ceefce0e731 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2847,7 +2847,8 @@ static inline void i915_gem_object_unpin_vmap(struct drm_i915_gem_object *obj)
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
int i915_gem_object_sync(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_request *to);
+ struct drm_i915_gem_request *to,
+ bool write);
void i915_vma_move_to_active(struct i915_vma *vma,
struct drm_i915_gem_request *req,
unsigned flags);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 91f764e9dff2..3eeca1fb89d2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2597,9 +2597,9 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
*/
int
i915_gem_object_sync(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_request *to)
+ struct drm_i915_gem_request *to,
+ bool write)
{
- const bool readonly = obj->base.pending_write_domain == 0;
struct drm_i915_gem_request *req[I915_NUM_RINGS];
int ret, i, n;
@@ -2607,13 +2607,13 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
return 0;
n = 0;
- if (readonly) {
- if (obj->last_write.request)
- req[n++] = obj->last_write.request;
- } else {
+ if (write) {
for (i = 0; i < I915_NUM_RINGS; i++)
if (obj->last_read[i].request)
req[n++] = obj->last_read[i].request;
+ } else {
+ if (obj->last_write.request)
+ req[n++] = obj->last_write.request;
}
for (i = 0; i < n; i++) {
ret = __i915_gem_object_sync(obj, to, req[i]);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 891c4593b8eb..2868e094f67c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -543,42 +543,25 @@ repeat:
}
static int
-eb_relocate_entry(struct drm_i915_gem_object *obj,
+eb_relocate_entry(struct i915_vma *vma,
struct i915_execbuffer *eb,
struct drm_i915_gem_relocation_entry *reloc)
{
- struct drm_gem_object *target_obj;
- struct drm_i915_gem_object *target_i915_obj;
- struct i915_vma *target_vma;
- uint64_t target_offset;
+ struct i915_vma *target;
+ u64 target_offset;
int ret;
/* we've already hold a reference to all valid objects */
- target_vma = eb_get_vma(eb, reloc->target_handle);
- if (unlikely(target_vma == NULL))
+ target = eb_get_vma(eb, reloc->target_handle);
+ if (unlikely(target == NULL))
return -ENOENT;
- target_i915_obj = target_vma->obj;
- target_obj = &target_vma->obj->base;
-
- target_offset = gen8_canonical_addr(target_vma->node.start);
-
- /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
- * pipe_control writes because the gpu doesn't properly redirect them
- * through the ppgtt for non_secure batchbuffers. */
- if (unlikely(IS_GEN6(eb->i915) &&
- reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
- ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
- PIN_GLOBAL);
- if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
- return ret;
- }
/* Validate that the target is in a valid r/w GPU domain */
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
DRM_DEBUG("reloc with multiple write domains: "
- "obj %p target %d offset %d "
+ "target %d offset %d "
"read %08x write %08x",
- obj, reloc->target_handle,
+ reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
reloc->write_domain);
@@ -587,47 +570,59 @@ eb_relocate_entry(struct drm_i915_gem_object *obj,
if (unlikely((reloc->write_domain | reloc->read_domains)
& ~I915_GEM_GPU_DOMAINS)) {
DRM_DEBUG("reloc with read/write non-GPU domains: "
- "obj %p target %d offset %d "
+ "target %d offset %d "
"read %08x write %08x",
- obj, reloc->target_handle,
+ reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
reloc->write_domain);
return -EINVAL;
}
- target_obj->pending_read_domains |= reloc->read_domains;
- target_obj->pending_write_domain |= reloc->write_domain;
+ if (reloc->write_domain)
+ target->exec_entry->flags |= EXEC_OBJECT_WRITE;
+
+ /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
+ * pipe_control writes because the gpu doesn't properly redirect them
+ * through the ppgtt for non_secure batchbuffers. */
+ if (unlikely(IS_GEN6(eb->i915) &&
+ reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
+ ret = i915_vma_bind(target, target->obj->cache_level,
+ PIN_GLOBAL);
+ if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
+ return ret;
+ }
/* If the relocation already has the right value in it, no
* more work needs to be done.
*/
+ target_offset = gen8_canonical_addr(target->node.start);
if (target_offset == reloc->presumed_offset)
return 0;
/* Check that the relocation address is valid... */
if (unlikely(reloc->offset >
- obj->base.size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
+ vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
DRM_DEBUG("Relocation beyond object bounds: "
- "obj %p target %d offset %d size %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset,
- (int) obj->base.size);
+ "target %d offset %d size %d.\n",
+ reloc->target_handle,
+ (int)reloc->offset,
+ (int)vma->size);
return -EINVAL;
}
if (unlikely(reloc->offset & 3)) {
DRM_DEBUG("Relocation not 4-byte aligned: "
- "obj %p target %d offset %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset);
+ "target %d offset %d.\n",
+ reloc->target_handle,
+ (int)reloc->offset);
return -EINVAL;
}
/* We can't wait for rendering with pagefaults disabled */
- if (i915_gem_object_is_active(obj) && pagefault_disabled())
+ if (i915_gem_object_is_active(vma->obj) && pagefault_disabled())
return -EFAULT;
- ret = relocate_entry(obj, reloc, &eb->reloc_cache, target_offset);
+ ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset);
if (ret)
return ret;
@@ -662,7 +657,7 @@ static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb)
do {
u64 offset = r->presumed_offset;
- ret = eb_relocate_entry(vma->obj, eb, r);
+ ret = eb_relocate_entry(vma, eb, r);
if (ret)
goto out;
@@ -694,7 +689,7 @@ eb_relocate_vma_slow(struct i915_vma *vma,
int i, ret = 0;
for (i = 0; i < entry->relocation_count; i++) {
- ret = eb_relocate_entry(vma->obj, eb, &relocs[i]);
+ ret = eb_relocate_entry(vma, eb, &relocs[i]);
if (ret)
break;
}
@@ -736,7 +731,6 @@ eb_reserve_vma(struct i915_vma *vma,
struct intel_engine_cs *ring,
bool *need_reloc)
{
- struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
uint64_t flags;
int ret;
@@ -790,11 +784,6 @@ eb_reserve_vma(struct i915_vma *vma,
*need_reloc = true;
}
- if (entry->flags & EXEC_OBJECT_WRITE) {
- obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
- obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
- }
-
return 0;
}
@@ -855,7 +844,6 @@ eb_vma_misplaced(struct i915_vma *vma)
static int eb_reserve(struct i915_execbuffer *eb)
{
const bool has_fenced_gpu_access = INTEL_INFO(eb->i915)->gen < 4;
- struct drm_i915_gem_object *obj;
struct i915_vma *vma;
struct list_head ordered_vmas;
struct list_head pinned_vmas;
@@ -868,7 +856,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
bool need_fence, need_mappable;
vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
- obj = vma->obj;
entry = vma->exec_entry;
if (eb->ctx->flags & CONTEXT_NO_ZEROMAP)
@@ -888,9 +875,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
list_move(&vma->exec_list, &ordered_vmas);
} else
list_move_tail(&vma->exec_list, &ordered_vmas);
-
- obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
- obj->base.pending_write_domain = 0;
}
list_splice(&ordered_vmas, &eb->vmas);
list_splice(&pinned_vmas, &eb->vmas);
@@ -1085,7 +1069,9 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
struct drm_i915_gem_object *obj = vma->obj;
if (obj->flags & other_rings) {
- ret = i915_gem_object_sync(obj, eb->request);
+ ret = i915_gem_object_sync(obj,
+ eb->request,
+ vma->exec_entry->flags & EXEC_OBJECT_WRITE);
if (ret)
return ret;
}
@@ -1248,12 +1234,10 @@ eb_move_to_active(struct i915_execbuffer *eb)
u32 old_read = obj->base.read_domains;
u32 old_write = obj->base.write_domain;
- obj->base.write_domain = obj->base.pending_write_domain;
- if (obj->base.write_domain)
- vma->exec_entry->flags |= EXEC_OBJECT_WRITE;
- else
- obj->base.pending_read_domains |= obj->base.read_domains;
- obj->base.read_domains = obj->base.pending_read_domains;
+ obj->base.write_domain = 0;
+ if (vma->exec_entry->flags & EXEC_OBJECT_WRITE)
+ obj->base.read_domains = 0;
+ obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags);
trace_i915_gem_object_change_domain(obj, old_read, old_write);
@@ -1598,7 +1582,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
/* Set the pending read domains for the batch buffer to COMMAND */
- if (eb.batch_vma->obj->base.pending_write_domain) {
+ if (eb.batch_vma->exec_entry->flags & EXEC_OBJECT_WRITE) {
DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
ret = -EINVAL;
goto err;
@@ -1630,8 +1614,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
}
- eb.batch_vma->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
-
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 13d283e4b0a3..e518d3300a3e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11670,7 +11670,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
goto cleanup_pending;
}
- ret = i915_gem_object_sync(obj, request);
+ ret = i915_gem_object_sync(obj, request, false);
if (ret)
goto cleanup_request;
}
--
2.7.0.rc3
More information about the Intel-gfx
mailing list