[Intel-gfx] [PATCH 16/21] drm/i915/selftests: huge page tests

Mon Jul 3 14:48:18 UTC 2017

Quoting Matthew Auld (2017-07-03 15:14:58)
> +static struct i915_vma *
> +gpu_write_dw(struct i915_vma *vma, u64 offset, u32 value)
> +{
> +       struct drm_i915_private *i915 = to_i915(vma->obj->base.dev);
> +       struct drm_i915_gem_object *obj;
> +       struct i915_vma *batch;
> +       unsigned int size;
> +       u32 *cmd;
> +       int err;
> +
> +       GEM_BUG_ON(INTEL_GEN(i915) < 8);

Didn't you enable large pages for snb and earlier?

> +static int gpu_write(struct i915_vma *vma,
> +                    struct i915_gem_context *ctx,
> +                    unsigned long offset,
> +                    u32 value)
> +{
> +       struct drm_i915_private *i915 = to_i915(vma->obj->base.dev);
> +       struct drm_i915_gem_request *rq;
> +       struct i915_vma *batch;
> +       int flags = 0;
> +       int err;
> +
> +       batch = gpu_write_dw(vma, offset, value);
> +       if (IS_ERR(batch))
> +               return PTR_ERR(batch);
> +
> +       rq = i915_gem_request_alloc(i915->engine[RCS], ctx);
> +       if (IS_ERR(rq)) {
> +               err = PTR_ERR(rq);
> +               goto err_batch;
> +       }
> +
> +       err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
> +       if (err)
> +               goto err_request;
> +
> +       err = i915_switch_context(rq);
> +       if (err)
> +               goto err_request;
> +
> +       err = rq->engine->emit_bb_start(rq,
> +                       batch->node.start, batch->node.size,
> +                       flags);
> +       if (err)
> +               goto err_request;
> +
> +       i915_vma_move_to_active(batch, rq, 0);
> +       i915_gem_object_set_active_reference(batch->obj);
> +       i915_vma_unpin(batch);
> +       i915_vma_close(batch);
> +
> +       i915_vma_move_to_active(vma, rq, 0);
> +
> +       reservation_object_lock(vma->obj->resv, NULL);

vma->resv

> +       reservation_object_add_excl_fence(vma->obj->resv, &rq->fence);
> +       reservation_object_unlock(vma->obj->resv);
> +
> +       __i915_add_request(rq, true);
> +
> +       return 0;
> +
> +err_request:
> +       __i915_add_request(rq, false);
> +err_batch:
> +       i915_vma_unpin(batch);

Leaking batch. Just tie the lifetime of the batch to the request
immediately after allocating the request.

> +
> +       return err;
> +}
> +
> +static inline int igt_can_allocate_thp(struct drm_i915_private *i915)
> +{
> +       return HAS_PAGE_SIZE(i915, I915_GTT_PAGE_SIZE_2M) &&
> +               has_transparent_hugepage();
> +}
> +
> +static int igt_ppgtt_write_huge(void *arg)
> +{
> +       struct i915_hw_ppgtt *ppgtt = arg;
> +       struct drm_i915_private *i915 = ppgtt->base.i915;
> +       struct drm_i915_gem_object *obj;
> +       const unsigned int page_sizes[] = {
> +               I915_GTT_PAGE_SIZE_64K,
> +               I915_GTT_PAGE_SIZE_2M,
> +       };
> +       struct i915_vma *vma;
> +       int err = 0;
> +       int i;
> +
> +       if (!igt_can_allocate_thp(i915))
> +               return 0;
> +
> +       /* Sanity check that the HW uses huge-pages correctly */
> +
> +       ppgtt = i915->kernel_context->ppgtt;
> +
> +       obj = i915_gem_object_create(i915, I915_GTT_PAGE_SIZE_2M);
> +       if (IS_ERR(obj))
> +               return PTR_ERR(obj);

2M is MAX_ORDER, so you don't need thp for this and can just use
i915_gem_object_create_internal() (or both, once to test without relying
on thp, perhaps more common, then once to test thp shmemfs).

> +       err = i915_gem_object_pin_pages(obj);
> +       if (err)
> +               goto out_put;
> +
> +       GEM_BUG_ON(!obj->mm.page_sizes.sg);
> +
> +       if (obj->mm.page_sizes.sg < I915_GTT_PAGE_SIZE_2M) {
> +               pr_err("Failed to allocate huge pages\n");
> +               err = -EINVAL;

Failure? Or just unable to test? Nothing forces the kernel to give you
contiguous space.

> +               goto out_unpin;
> +       }
> +
> +       vma = i915_vma_instance(obj, &ppgtt->base, NULL);
> +       if (IS_ERR(vma)) {
> +               err = PTR_ERR(vma);
> +               goto out_unpin;
> +       }
> +
> +       for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
> +               unsigned int page_size = page_sizes[i];
> +               u32 *map;
> +
> +               if (!HAS_PAGE_SIZE(i915, page_size))
> +                       continue;
> +
> +               /* Force the page size */
> +               obj->mm.page_sizes.sg = page_size;
> +
> +               err = i915_gem_object_set_to_gtt_domain(obj, true);
> +               if (err)
> +                       goto out_close;

Gets in the way of the important details, make it coherent and get rid
of this.

> +               err = i915_vma_pin(vma, 0, 0, PIN_USER);
> +               if (err)
> +                       goto out_close;

Move the unbind above this:

/* Force the page size */
i915_vma_unbind(vma);

obj->m.page_sizes.sg = page_size;

i915_vma_pin(vma);
GEM_BUG_ON(vma->page_sizes.sg != page_size);

> +
> +               GEM_BUG_ON(obj->mm.page_sizes.gtt);
> +               GEM_BUG_ON(vma->page_sizes.sg != page_size);
> +               GEM_BUG_ON(!vma->page_sizes.phys);
> +
> +               GEM_BUG_ON(vma->page_sizes.gtt != page_size);
> +               GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I915_GTT_PAGE_SIZE_2M));
> +               GEM_BUG_ON(!IS_ALIGNED(vma->node.size, I915_GTT_PAGE_SIZE_2M));
> +
> +               err = gpu_write(vma, i915->kernel_context, 0, page_size);
> +               if (err) {
> +                       i915_vma_unpin(vma);
> +                       goto out_close;
> +               }
> +
> +               err = i915_gem_object_set_to_cpu_domain(obj, false);
> +               if (err) {
> +                       i915_vma_unpin(vma);
> +                       goto out_close;
> +               }

Again, this is getting in the way of the important details.

> +               i915_vma_unpin(vma);
> +               err = i915_vma_unbind(vma);
> +               if (err)
> +                       goto out_close;
> +
> +               map = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +               if (IS_ERR(map)) {
> +                       err = PTR_ERR(map);
> +                       goto out_close;
> +               }

Do we need to test different PTE bits?

If you don't make this coherent, use

map = pin_map(obj);
set_to_cpu_domain(obj);

for_each_page()
	test

set_to_gtt_domain(obj);
unpin_map(obj);

> +
> +               GEM_BUG_ON(map[0] != page_size);

This is the essential test being performed. Check it, and report an
error properly. Check every page at different offsets!

> +
> +               i915_gem_object_unpin_map(obj);
> +       }
> +
> +out_close:
> +       i915_vma_close(vma);
> +out_unpin:
> +       i915_gem_object_unpin_pages(obj);
> +out_put:
> +       i915_gem_object_put(obj);
> +
> +       return err;
> +}