[Intel-gfx] [PATCH 3/5] drm/i915: Trim the command parser allocations
John Harrison
John.C.Harrison at Intel.com
Fri Feb 13 05:08:59 PST 2015
Hello,
Apparently, I've been volunteered to review these patches despite not
knowing too much about these areas of the driver...
On 14/01/2015 11:20, Chris Wilson wrote:
> Currently, the command parser tries to create a secondary batch exactly
> as large as the original, and vmap both. This is open to abuse by
> userspace using extremely large batch objects, but only executing very
> short batches. For example, this would be if userspace were to implement
> a command submission ringbuffer. However, we only need to allocate pages
> for just the contents of the command sequence in the batch - all
> relocations copied to the secondary batch will reference the original
> batch and so there can be no access to the secondary batch outside of
> the explicit execution region.
>
> Testcase: igt/gem_exec_big #ivb,byt,hsw
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88308
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_cmd_parser.c | 74 ++++++++++++++----------------
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 74 ++++++++++++++++--------------
> 2 files changed, 73 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 806e812340d0..9a6da3536ae5 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -818,24 +818,26 @@ static bool valid_reg(const u32 *table, int count, u32 addr)
> return false;
> }
>
> -static u32 *vmap_batch(struct drm_i915_gem_object *obj)
> +static u32 *vmap_batch(struct drm_i915_gem_object *obj,
> + unsigned start, unsigned len)
> {
> int i;
> void *addr = NULL;
> struct sg_page_iter sg_iter;
> + int first_page = start >> PAGE_SHIFT;
> + int last_page = (len + start + 4095) >> PAGE_SHIFT;
> + int npages = last_page - first_page;
> struct page **pages;
>
> - pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages));
> + pages = drm_malloc_ab(npages, sizeof(*pages));
> if (pages == NULL) {
> DRM_DEBUG_DRIVER("Failed to get space for pages\n");
> goto finish;
> }
>
> i = 0;
> - for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
> - pages[i] = sg_page_iter_page(&sg_iter);
> - i++;
> - }
> + for_each_sg_page(obj->pages->sgl, &sg_iter, npages, first_page)
> + pages[i++] = sg_page_iter_page(&sg_iter);
>
> addr = vmap(pages, i, 0, PAGE_KERNEL);
> if (addr == NULL) {
> @@ -855,61 +857,61 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
> u32 batch_start_offset,
> u32 batch_len)
> {
> - int ret = 0;
> int needs_clflush = 0;
> - u32 *src_base, *dest_base = NULL;
> - u32 *src_addr, *dest_addr;
> - u32 offset = batch_start_offset / sizeof(*dest_addr);
> - u32 end = batch_start_offset + batch_len;
> + void *src_base, *src;
> + void *dst = NULL;
> + int ret;
>
> - if (end > dest_obj->base.size || end > src_obj->base.size)
> + if (batch_len > dest_obj->base.size ||
> + batch_len + batch_start_offset > src_obj->base.size)
> return ERR_PTR(-E2BIG);
>
> ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
> if (ret) {
> - DRM_DEBUG_DRIVER("CMD: failed to prep read\n");
> + DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
> return ERR_PTR(ret);
> }
>
> - src_base = vmap_batch(src_obj);
> + src_base = vmap_batch(src_obj, batch_start_offset, batch_len);
> if (!src_base) {
> DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
> ret = -ENOMEM;
> goto unpin_src;
> }
>
> - src_addr = src_base + offset;
> -
> - if (needs_clflush)
> - drm_clflush_virt_range((char *)src_addr, batch_len);
> + ret = i915_gem_object_get_pages(dest_obj);
> + if (ret) {
> + DRM_DEBUG_DRIVER("CMD: Failed to get pages for shadow batch\n");
> + goto unmap_src;
> + }
> + i915_gem_object_pin_pages(dest_obj);
>
> ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
> if (ret) {
> - DRM_DEBUG_DRIVER("CMD: Failed to set batch CPU domain\n");
> + DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
> goto unmap_src;
> }
>
> - dest_base = vmap_batch(dest_obj);
> - if (!dest_base) {
> + dst = vmap_batch(dest_obj, 0, batch_len);
> + if (!dst) {
> DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
> + i915_gem_object_unpin_pages(dest_obj);
> ret = -ENOMEM;
> goto unmap_src;
> }
>
> - dest_addr = dest_base + offset;
> -
> - if (batch_start_offset != 0)
> - memset((u8 *)dest_base, 0, batch_start_offset);
> + src = src_base + offset_in_page(batch_start_offset);
> + if (needs_clflush)
> + drm_clflush_virt_range(src, batch_len);
>
> - memcpy(dest_addr, src_addr, batch_len);
> - memset((u8 *)dest_addr + batch_len, 0, dest_obj->base.size - end);
> + memcpy(dst, src, batch_len);
>
> unmap_src:
> vunmap(src_base);
> unpin_src:
> i915_gem_object_unpin_pages(src_obj);
>
> - return ret ? ERR_PTR(ret) : dest_base;
> + return ret ? ERR_PTR(ret) : dst;
> }
>
> /**
> @@ -1046,34 +1048,26 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
> u32 batch_len,
> bool is_master)
> {
> - int ret = 0;
> u32 *cmd, *batch_base, *batch_end;
> struct drm_i915_cmd_descriptor default_desc = { 0 };
> bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
> -
> - ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
> - if (ret) {
> - DRM_DEBUG_DRIVER("CMD: Failed to pin shadow batch\n");
> - return -1;
> - }
> + int ret = 0;
>
> batch_base = copy_batch(shadow_batch_obj, batch_obj,
> batch_start_offset, batch_len);
> if (IS_ERR(batch_base)) {
> DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
> - i915_gem_object_ggtt_unpin(shadow_batch_obj);
> return PTR_ERR(batch_base);
> }
>
> - cmd = batch_base + (batch_start_offset / sizeof(*cmd));
> -
> /*
> * We use the batch length as size because the shadow object is as
> * large or larger and copy_batch() will write MI_NOPs to the extra
> * space. Parsing should be faster in some cases this way.
> */
> - batch_end = cmd + (batch_len / sizeof(*batch_end));
> + batch_end = batch_base + (batch_len / sizeof(*batch_end));
>
> + cmd = batch_base;
> while (cmd < batch_end) {
> const struct drm_i915_cmd_descriptor *desc;
> u32 length;
> @@ -1132,7 +1126,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
> }
>
> vunmap(batch_base);
> - i915_gem_object_ggtt_unpin(shadow_batch_obj);
> + i915_gem_object_unpin_pages(shadow_batch_obj);
>
> return ret;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 06bdf7670584..3fbd5212225f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1136,16 +1136,15 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
> struct drm_i915_gem_object *batch_obj,
> u32 batch_start_offset,
> u32 batch_len,
> - bool is_master,
> - u32 *flags)
> + bool is_master)
> {
> struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
> struct drm_i915_gem_object *shadow_batch_obj;
> - bool need_reloc = false;
> + struct i915_vma *vma;
> int ret;
>
> shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
> - batch_obj->base.size);
> + PAGE_ALIGN(batch_len));
> if (IS_ERR(shadow_batch_obj))
> return shadow_batch_obj;
>
> @@ -1155,40 +1154,30 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
> batch_start_offset,
> batch_len,
> is_master);
> - if (ret) {
> - if (ret == -EACCES)
> - return batch_obj;
> - } else {
> - struct i915_vma *vma;
> + if (ret)
> + goto err;
>
> - memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
> + ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
There is no explicit unpin for this. Does it happen automatically due to
adding the vma to the eb->vmas list?
Also, does it matter that it will be pinned again (and explicitly
unpinned) if the SECURE flag is set?
> + if (ret)
> + goto err;
>
> - vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
> - vma->exec_entry = shadow_exec_entry;
> - vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
> - drm_gem_object_reference(&shadow_batch_obj->base);
> - i915_gem_execbuffer_reserve_vma(vma, ring, &need_reloc);
> - list_add_tail(&vma->exec_list, &eb->vmas);
> + memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
>
> - shadow_batch_obj->base.pending_read_domains =
> - batch_obj->base.pending_read_domains;
> + vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
> + vma->exec_entry = shadow_exec_entry;
> + vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE | __EXEC_OBJECT_HAS_PIN;
> + drm_gem_object_reference(&shadow_batch_obj->base);
> + list_add_tail(&vma->exec_list, &eb->vmas);
>
> - /*
> - * Set the DISPATCH_SECURE bit to remove the NON_SECURE
> - * bit from MI_BATCH_BUFFER_START commands issued in the
> - * dispatch_execbuffer implementations. We specifically
> - * don't want that set when the command parser is
> - * enabled.
> - *
> - * FIXME: with aliasing ppgtt, buffers that should only
> - * be in ggtt still end up in the aliasing ppgtt. remove
> - * this check when that is fixed.
> - */
> - if (USES_FULL_PPGTT(dev))
> - *flags |= I915_DISPATCH_SECURE;
> - }
> + shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
> +
> + return shadow_batch_obj;
>
> - return ret ? ERR_PTR(ret) : shadow_batch_obj;
> +err:
> + if (ret == -EACCES) /* unhandled chained batch */
> + return batch_obj;
> + else
> + return ERR_PTR(ret);
> }
>
> int
> @@ -1532,12 +1521,27 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> batch_obj,
> args->batch_start_offset,
> args->batch_len,
> - file->is_master,
> - &flags);
> + file->is_master);
> if (IS_ERR(batch_obj)) {
> ret = PTR_ERR(batch_obj);
> goto err;
> }
> +
> + /*
> + * Set the DISPATCH_SECURE bit to remove the NON_SECURE
> + * bit from MI_BATCH_BUFFER_START commands issued in the
> + * dispatch_execbuffer implementations. We specifically
> + * don't want that set when the command parser is
> + * enabled.
> + *
> + * FIXME: with aliasing ppgtt, buffers that should only
> + * be in ggtt still end up in the aliasing ppgtt. remove
> + * this check when that is fixed.
> + */
> + if (USES_FULL_PPGTT(dev))
> + flags |= I915_DISPATCH_SECURE;
> +
> + exec_start = 0;
> }
>
> batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
More information about the Intel-gfx
mailing list