[Intel-gfx] [PATCH] drm/i915: Prevent negative relocation deltas from wrapping
Daniel Vetter
daniel at ffwll.ch
Thu May 15 14:46:32 CEST 2014
On Thu, May 15, 2014 at 01:17:12PM +0100, Chris Wilson wrote:
> This is pure evil. Userspace, I'm looking at you SNA, repacks batch
> buffers on the fly after generation as they are being passed to the
> kernel for execution. These batches also contain self-referenced
> relocations as a single buffer encompasses the state commands, kernels,
> vertices and sampler. During generation the buffers are placed at known
> offsets within the full batch, and then the relocation deltas (as passed
> to the kernel) are tweaked as the batch is repacked into a smaller buffer.
> This means that userspace is passing negative relocations deltas, which
> subsequently wrap to large values if the batch is at a low address. The
> GPU hangs when it then tries to use the large value as a base for its
> address offsets, rather than wrapping back to the real value (as one
> would hope). As the GPU uses positive offsets from the base, we can
> treat the relocation address as the minimum address read by the GPU.
> For the upper bound, we trust that userspace will not read beyond the
> end of the buffer.
>
> This fixes a GPU hang when it tries to use an address (relocation +
> offset) greater than the GTT size. The issue would occur quite easily
> with full-ppgtt as each fd gets its own VM space, so low offsets would
> often be handed out. However, with the rearrangement of the low GTT due
> to capturing the BIOS framebuffer, it is already affecting kernels 3.14
> onwards. I think only IVB+ is susceptible to this bug, but the workaround
> should only kick in rarely, so it seems sensible to always apply it.
>
> v2: Apply the workaround for LUT-based execbuffers as well and only for
> IVB+ as my SNB machine seems to be unaffected.
>
> Testcase: igt/gem_bad_reloc
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: stable at vger.kernel.org
Do we need to fix this in the kernel? Userspace supplying relocs that
kinda don't work smells fishy ...
-Daniel
> ---
> drivers/gpu/drm/i915/i915_gem.c | 15 ++++-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 91 ++++++++++++++++++++++++++++--
> 2 files changed, 97 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1a271ee8dc22..ae751b73a087 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3626,8 +3626,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> struct drm_device *dev = obj->base.dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> u32 size, fence_size, fence_alignment, unfenced_alignment;
> - size_t gtt_max =
> + unsigned long gtt_max =
> flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
> + unsigned long start = alignment;
> struct i915_vma *vma;
> int ret;
>
> @@ -3649,6 +3650,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
> return ERR_PTR(-EINVAL);
> }
> + if (alignment >= gtt_max) {
> + DRM_DEBUG("Alignment larger than the aperture: alignment=%d >= %s aperture=%lu\n",
> + alignment,
> + flags & PIN_MAPPABLE ? "mappable" : "total",
> + gtt_max);
> + return ERR_PTR(-EINVAL);
> + }
>
> size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
>
> @@ -3656,7 +3664,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> * before evicting everything in a vain attempt to find space.
> */
> if (obj->base.size > gtt_max) {
> - DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
> + DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
> obj->base.size,
> flags & PIN_MAPPABLE ? "mappable" : "total",
> gtt_max);
> @@ -3692,7 +3700,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> search_free:
> ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> size, alignment,
> - obj->cache_level, 0, gtt_max,
> + obj->cache_level,
> + start, gtt_max,
> DRM_MM_SEARCH_DEFAULT,
> DRM_MM_CREATE_DEFAULT);
> if (ret) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index e94aa365ae40..d37b54862d37 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -166,14 +166,12 @@ eb_lookup_vmas(struct eb_vmas *eb,
> list_del_init(&obj->obj_exec_link);
>
> vma->exec_entry = &exec[i];
> - if (eb->and < 0) {
> + vma->exec_handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
> + if (eb->and < 0)
> eb->lut[i] = vma;
> - } else {
> - uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
> - vma->exec_handle = handle;
> + else
> hlist_add_head(&vma->exec_node,
> - &eb->buckets[handle & eb->and]);
> - }
> + &eb->buckets[vma->exec_handle & eb->and]);
> ++i;
> }
>
> @@ -261,6 +259,19 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
> obj->cache_level != I915_CACHE_NONE);
> }
>
> +static bool invalid_offset(struct drm_device *dev, uint64_t offset)
> +{
> + const int gen = INTEL_INFO(dev)->gen;
> +
> + if (gen < 7)
> + return false;
> +
> + if (gen < 8)
> + offset = lower_32_bits(offset);
> +
> + return offset >= to_i915(dev)->gtt.base.total;
> +}
> +
> static int
> relocate_entry_cpu(struct drm_i915_gem_object *obj,
> struct drm_i915_gem_relocation_entry *reloc,
> @@ -272,6 +283,9 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
> char *vaddr;
> int ret;
>
> + if (invalid_offset(dev, delta))
> + return -EFAULT;
> +
> ret = i915_gem_object_set_to_cpu_domain(obj, true);
> if (ret)
> return ret;
> @@ -309,6 +323,9 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
> void __iomem *reloc_page;
> int ret;
>
> + if (invalid_offset(dev, delta))
> + return -EFAULT;
> +
> ret = i915_gem_object_set_to_gtt_domain(obj, true);
> if (ret)
> return ret;
> @@ -702,6 +719,62 @@ err:
> }
>
> static int
> +i915_gem_execbuffer_relocate_check_slow(struct i915_vma *vma,
> + struct drm_i915_gem_relocation_entry *relocs,
> + int total)
> +{
> + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> + struct drm_i915_gem_object *obj = vma->obj;
> + int32_t min = 0;
> + int i, ret;
> +
> + /* This is pure evil. Userspace, I'm looking at you SNA, repacks
> + * batch buffers on the fly after generation and before
> + * being passed to the kernel for execution. These batches also
> + * contain self-referenced relocations as a single buffer encompasses
> + * the state commands, kernels, vertices and sampler. During
> + * generation the buffers are placed at known offsets within the full
> + * batch, and then the relocation deltas (as passed to the kernel)
> + * are tweaked as the batch is repacked into a smaller buffer.
> + * This means that userspace is passing negative relocations deltas,
> + * which subsequently wrap to large values. The GPU hangs when it
> + * then tries to use the large value as a base for the address offset,
> + * rather than wrapping back to the real value (as one would hope).
> + */
> + for (i = 0; i < total; i++) {
> + struct drm_i915_gem_relocation_entry *reloc = &relocs[i];
> + int32_t sdelta;
> +
> + if (reloc->target_handle != vma->exec_handle)
> + continue;
> +
> + sdelta = reloc->delta;
> + if (sdelta < min)
> + min = sdelta;
> + }
> + if (min < 0) {
> + if (drm_mm_node_allocated(&vma->node) &&
> + invalid_offset(obj->base.dev, vma->node.start + (uint32_t)min)) {
> + ret = i915_vma_unbind(vma);
> + if (ret)
> + return ret;
> + }
> + if (!drm_mm_node_allocated(&vma->node)) {
> + if (entry->alignment == 0)
> + entry->alignment =
> + i915_gem_get_gtt_alignment(obj->base.dev,
> + obj->base.size,
> + obj->tiling_mode,
> + entry->flags & __EXEC_OBJECT_NEEDS_MAP);
> + while (entry->alignment < -min)
> + entry->alignment <<= 1;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int
> i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
> struct drm_i915_gem_execbuffer2 *args,
> struct drm_file *file,
> @@ -792,6 +865,12 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
> if (ret)
> goto err;
>
> + list_for_each_entry(vma, &eb->vmas, exec_list) {
> + ret = i915_gem_execbuffer_relocate_check_slow(vma, reloc, total);
> + if (ret)
> + goto err;
> + }
> +
> need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
> ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
> if (ret)
> --
> 2.0.0.rc2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list