[Intel-gfx] [PATCH] drm/i915: Force CPU relocations if not GTT mapped
Daniel Vetter
daniel at ffwll.ch
Mon Aug 11 12:02:36 CEST 2014
On Sun, Aug 10, 2014 at 08:06:57AM +0100, Chris Wilson wrote:
> Move the decision on whether we need to have a mappable object during
> execbuffer to the fore and then reuse that decision by propagating the
> flag through to reservation. As a corollary, before doing the actual
> relocation through the GTT, we can make sure that we do have a GTT
> mapping through which to operate.
>
> v2: Revamp and resend to ease future patches.
> v3: Refresh patch rationale
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky at intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Ok, the secure batch hunk in here looks rather unrelated and imo also a
bit incomplete. I've dropped it. And I've added a bit of text to the
commit message to explain why this patch touches map_and_fenceable logic.
-Daniel
> ---
> drivers/gpu/drm/i915/i915_gem.c | 8 +--
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 ++++++++++++++----------------
> 2 files changed, 42 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 99250d27668d..6366230c4e32 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
> vma->unbind_vma(vma);
>
> list_del_init(&vma->mm_list);
> - /* Avoid an unnecessary call to unbind on rebind. */
> if (i915_is_ggtt(vma->vm))
> - obj->map_and_fenceable = true;
> + obj->map_and_fenceable = false;
>
> drm_mm_remove_node(&vma->node);
> i915_gem_vma_destroy(vma);
> @@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
> return 0;
> }
> } else if (enable) {
> + if (WARN_ON(!obj->map_and_fenceable))
> + return -EINVAL;
> +
> reg = i915_find_fence_reg(dev);
> if (IS_ERR(reg))
> return PTR_ERR(reg);
> @@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>
> obj->fence_reg = I915_FENCE_REG_NONE;
> obj->madv = I915_MADV_WILLNEED;
> - /* Avoid an unnecessary call to unbind on the first bind. */
> - obj->map_and_fenceable = true;
>
> i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 60998fc4e5b2..8b734d5d16b4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -35,6 +35,7 @@
>
> #define __EXEC_OBJECT_HAS_PIN (1<<31)
> #define __EXEC_OBJECT_HAS_FENCE (1<<30)
> +#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
> #define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
>
> #define BATCH_OFFSET_BIAS (256*1024)
> @@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
> }
>
> static int
> -need_reloc_mappable(struct i915_vma *vma)
> -{
> - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> - return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
> - i915_is_ggtt(vma->vm);
> -}
> -
> -static int
> i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> struct intel_engine_cs *ring,
> bool *need_reloc)
> @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> struct drm_i915_gem_object *obj = vma->obj;
> struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> - bool need_fence;
> uint64_t flags;
> int ret;
>
> flags = 0;
> -
> - need_fence =
> - has_fenced_gpu_access &&
> - entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> - obj->tiling_mode != I915_TILING_NONE;
> - if (need_fence || need_reloc_mappable(vma))
> + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
> flags |= PIN_MAPPABLE;
> -
> if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
> flags |= PIN_GLOBAL;
> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
> @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> }
>
> static bool
> -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
> +need_reloc_mappable(struct i915_vma *vma)
> {
> struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> - struct drm_i915_gem_object *obj = vma->obj;
> - bool need_fence, need_mappable;
>
> - need_fence =
> - has_fenced_gpu_access &&
> - entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> - obj->tiling_mode != I915_TILING_NONE;
> - need_mappable = need_fence || need_reloc_mappable(vma);
> + if (entry->relocation_count == 0)
> + return false;
>
> - WARN_ON((need_mappable || need_fence) &&
> + if (!i915_is_ggtt(vma->vm))
> + return false;
> +
> + /* See also use_cpu_reloc() */
> + if (HAS_LLC(vma->obj->base.dev))
> + return false;
> +
> + if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> + return false;
> +
> + return true;
> +}
> +
> +static bool
> +eb_vma_misplaced(struct i915_vma *vma)
> +{
> + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> + struct drm_i915_gem_object *obj = vma->obj;
> +
> + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
> !i915_is_ggtt(vma->vm));
>
> if (entry->alignment &&
> vma->node.start & (entry->alignment - 1))
> return true;
>
> - if (need_mappable && !obj->map_and_fenceable)
> + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
> return true;
>
> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
> obj->tiling_mode != I915_TILING_NONE;
> need_mappable = need_fence || need_reloc_mappable(vma);
>
> - if (need_mappable)
> + if (need_mappable) {
> + entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
> list_move(&vma->exec_list, &ordered_vmas);
> - else
> + } else
> list_move_tail(&vma->exec_list, &ordered_vmas);
>
> obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
> @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
> if (!drm_mm_node_allocated(&vma->node))
> continue;
>
> - if (eb_vma_misplaced(vma, has_fenced_gpu_access))
> + if (eb_vma_misplaced(vma))
> ret = i915_vma_unbind(vma);
> else
> ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
> @@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
> * batch" bit. Hence we need to pin secure batches into the global gtt.
> * hsw should have this fixed, but bdw mucks it up again. */
> - if (flags & I915_DISPATCH_SECURE &&
> - !batch_obj->has_global_gtt_mapping) {
> - /* When we have multiple VMs, we'll need to make sure that we
> - * allocate space first */
> - struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
> - BUG_ON(!vma);
> - vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
> - }
> + if (flags & I915_DISPATCH_SECURE) {
> + ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
> + if (ret)
> + goto err;
>
> - if (flags & I915_DISPATCH_SECURE)
> exec_start += i915_gem_obj_ggtt_offset(batch_obj);
> - else
> + } else
> exec_start += i915_gem_obj_offset(batch_obj, vm);
>
> ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
> - args, &eb->vmas, batch_obj, exec_start, flags);
> - if (ret)
> - goto err;
> + args, &eb->vmas, batch_obj, exec_start, flags);
>
> + if (flags & I915_DISPATCH_SECURE)
> + i915_gem_object_ggtt_unpin(batch_obj);
> err:
> /* the request owns the ref now */
> i915_gem_context_unreference(ctx);
> --
> 2.1.0.rc1
>
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list