[Intel-gfx] [PATCH 1/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
Chris Wilson
chris at chris-wilson.co.uk
Thu May 15 17:55:25 CEST 2014
With ppgtt, it is no longer correct to mark an object as
map_and_fenceable if we simply unbind it from the global gtt. This has
consequences during execbuffer where we simply use
obj->map_and_fenceable in use_cpu_reloc() to decide which access method
to use for writing into the object. Now for a ppgtt object,
map_and_fenceable will be true when it is not bound into the ggtt but
only in a ppgtt, leading to an invalid access on a non-llc architecture.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky at intel.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
---
drivers/gpu/drm/i915/i915_gem.c | 8 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 146 +++++++++++++++--------------
2 files changed, 79 insertions(+), 75 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f8b2c16745f8..844ea6048321 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2807,9 +2807,8 @@ int i915_vma_unbind(struct i915_vma *vma)
i915_gem_gtt_finish_object(obj);
list_del_init(&vma->mm_list);
- /* Avoid an unnecessary call to unbind on rebind. */
if (i915_is_ggtt(vma->vm))
- obj->map_and_fenceable = true;
+ obj->map_and_fenceable = false;
drm_mm_remove_node(&vma->node);
i915_gem_vma_destroy(vma);
@@ -3159,6 +3158,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
return 0;
}
} else if (enable) {
+ if (WARN_ON(!obj->map_and_fenceable))
+ return -EINVAL;
+
reg = i915_find_fence_reg(dev);
if (IS_ERR(reg))
return PTR_ERR(reg);
@@ -4170,8 +4172,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->fence_reg = I915_FENCE_REG_NONE;
obj->madv = I915_MADV_WILLNEED;
- /* Avoid an unnecessary call to unbind on the first bind. */
- obj->map_and_fenceable = true;
i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c7ee1e3013ac..b4d8c31c0919 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
struct eb_vmas {
struct list_head vmas;
@@ -532,14 +533,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
}
static int
-need_reloc_mappable(struct i915_vma *vma)
-{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
- i915_is_ggtt(vma->vm);
-}
-
-static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_ring_buffer *ring,
bool *need_reloc)
@@ -547,19 +540,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
- bool need_fence;
unsigned flags;
int ret;
flags = 0;
-
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- if (need_fence || need_reloc_mappable(vma))
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_MAPPABLE;
-
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
@@ -595,6 +581,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
return 0;
}
+static bool
+need_reloc_mappable(struct i915_vma *vma)
+{
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+
+ if (entry->relocation_count == 0)
+ return false;
+
+ if (!i915_is_ggtt(vma->vm))
+ return false;
+
+ /* See also use_cpu_reloc() */
+ if (HAS_LLC(vma->obj->base.dev))
+ return false;
+
+ if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return false;
+
+ return true;
+}
+
static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct list_head *vmas,
@@ -627,9 +634,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
- if (need_mappable)
+ if (need_mappable) {
+ entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas);
- else
+ } else
list_move_tail(&vma->exec_list, &ordered_vmas);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -657,25 +665,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
/* Unbind any ill-fitting objects or pin. */
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- bool need_fence, need_mappable;
obj = vma->obj;
if (!drm_mm_node_allocated(&vma->node))
continue;
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- need_mappable = need_fence || need_reloc_mappable(vma);
-
- WARN_ON((need_mappable || need_fence) &&
- !i915_is_ggtt(vma->vm));
-
- if ((entry->alignment &&
- vma->node.start & (entry->alignment - 1)) ||
- (need_mappable && !obj->map_and_fenceable))
+ if ((entry->alignment && vma->node.start & (entry->alignment - 1)) ||
+ (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable))
ret = i915_vma_unbind(vma);
else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -776,9 +773,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
* relocations were valid.
*/
for (j = 0; j < exec[i].relocation_count; j++) {
- if (copy_to_user(&user_relocs[j].presumed_offset,
- &invalid_offset,
- sizeof(invalid_offset))) {
+ if (__copy_to_user(&user_relocs[j].presumed_offset,
+ &invalid_offset,
+ sizeof(invalid_offset))) {
ret = -EFAULT;
mutex_lock(&dev->struct_mutex);
goto err;
@@ -1268,33 +1265,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
- if (flags & I915_DISPATCH_SECURE &&
- !batch_obj->has_global_gtt_mapping) {
- /* When we have multiple VMs, we'll need to make sure that we
- * allocate space first */
- struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
- BUG_ON(!vma);
- vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
- }
+ if (flags & I915_DISPATCH_SECURE) {
+ ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+ if (ret)
+ goto err;
- if (flags & I915_DISPATCH_SECURE)
exec_start += i915_gem_obj_ggtt_offset(batch_obj);
- else
+ } else
exec_start += i915_gem_obj_offset(batch_obj, vm);
ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
if (ret)
- goto err;
+ goto err_batch;
ret = i915_switch_context(ring, ctx);
if (ret)
- goto err;
+ goto err_batch;
if (ring == &dev_priv->ring[RCS] &&
mode != dev_priv->relative_constants_mode) {
ret = intel_ring_begin(ring, 4);
if (ret)
- goto err;
+ goto err_batch;
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
@@ -1308,30 +1300,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
ret = i915_reset_gen7_sol_offsets(dev, ring);
if (ret)
- goto err;
+ goto err_batch;
}
-
exec_len = args->batch_len;
if (cliprects) {
for (i = 0; i < args->num_cliprects; i++) {
ret = i915_emit_box(dev, &cliprects[i],
args->DR1, args->DR4);
if (ret)
- goto err;
+ goto err_batch;
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
- goto err;
+ goto err_batch;
}
} else {
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
- goto err;
+ goto err_batch;
}
trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
@@ -1339,6 +1330,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
+err_batch:
+ if (flags & I915_DISPATCH_SECURE)
+ i915_gem_object_ggtt_unpin(batch_obj);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);
@@ -1420,18 +1414,21 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
if (!ret) {
+ struct drm_i915_gem_exec_object __user *user_exec_list =
+ to_user_ptr(args->buffers_ptr);
+
/* Copy the new buffer offsets back to the user's exec list. */
- for (i = 0; i < args->buffer_count; i++)
- exec_list[i].offset = exec2_list[i].offset;
- /* ... and back out to userspace */
- ret = copy_to_user(to_user_ptr(args->buffers_ptr),
- exec_list,
- sizeof(*exec_list) * args->buffer_count);
- if (ret) {
- ret = -EFAULT;
- DRM_DEBUG("failed to copy %d exec entries "
- "back to user (%d)\n",
- args->buffer_count, ret);
+ for (i = 0; i < args->buffer_count; i++) {
+ ret = __copy_to_user(&user_exec_list[i].offset,
+ &exec2_list[i].offset,
+ sizeof(user_exec_list[i].offset));
+ if (ret) {
+ ret = -EFAULT;
+ DRM_DEBUG("failed to copy %d exec entries "
+ "back to user (%d)\n",
+ args->buffer_count, ret);
+ break;
+ }
}
}
@@ -1482,14 +1479,21 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
if (!ret) {
/* Copy the new buffer offsets back to the user's exec list. */
- ret = copy_to_user(to_user_ptr(args->buffers_ptr),
- exec2_list,
- sizeof(*exec2_list) * args->buffer_count);
- if (ret) {
- ret = -EFAULT;
- DRM_DEBUG("failed to copy %d exec entries "
- "back to user (%d)\n",
- args->buffer_count, ret);
+ struct drm_i915_gem_exec_object2 *user_exec_list =
+ to_user_ptr(args->buffers_ptr);
+ int i;
+
+ for (i = 0; i < args->buffer_count; i++) {
+ ret = __copy_to_user(&user_exec_list[i].offset,
+ &exec2_list[i].offset,
+ sizeof(user_exec_list[i].offset));
+ if (ret) {
+ ret = -EFAULT;
+ DRM_DEBUG("failed to copy %d exec entries "
+ "back to user\n",
+ args->buffer_count);
+ break;
+ }
}
}
--
2.0.0.rc2
More information about the Intel-gfx
mailing list