[Intel-gfx] [PATCH 35/37] drm/i915: Live testing for context execution
Joonas Lahtinen
joonas.lahtinen at linux.intel.com
Fri Jan 13 14:28:58 UTC 2017
On ke, 2017-01-11 at 21:09 +0000, Chris Wilson wrote:
> Check we can create and execution within a context.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
<SNIP>
> +static struct i915_vma *
> +gpu_fill_pages(struct i915_vma *vma,
> + unsigned long first_page,
> + unsigned int offset_in_page,
> + unsigned long count,
> + u32 value)
> +{
> + struct drm_i915_gem_object *obj;
> + const int gen = INTEL_GEN(vma->vm->i915);
> + unsigned long sz = (4*count + 1)*sizeof(u32);
> + u64 offset;
> + u32 *cmd;
> + int err;
> +
> + GEM_BUG_ON(offset_in_page >= PAGE_SIZE);
offset_in_page is a function too...
<SNIP>
For future synchronization purposes, maybe document where the below was
cloned from?
> + offset = PAGE_SIZE * first_page + offset_in_page;
> + offset += vma->node.start;
> + for (sz = 0; sz < count; sz++) {
> + if (gen >= 8) {
> + *cmd++ = MI_STORE_DWORD_IMM_GEN4;
> + *cmd++ = lower_32_bits(offset);
> + *cmd++ = upper_32_bits(offset);
> + *cmd++ = value;
> + } else if (gen >= 6) {
> + *cmd++ = MI_STORE_DWORD_IMM_GEN4;
> + *cmd++ = 0;
> + *cmd++ = offset;
GEM_BUG_ON on overflows and so on. In the following branches too. And
maybe be explicit and "= lower_32_bits(offset);"?
> + *cmd++ = value;
> + } else if (gen >= 4) {
> + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
> + *cmd++ = 0;
> + *cmd++ = offset;
> + *cmd++ = value;
> + } else {
> + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22;
> + *cmd++ = offset;
> + *cmd++ = value;
> + }
> + offset += PAGE_SIZE;
> + }
<SNIP>
>
> +static int gpu_fill(struct drm_i915_gem_object *obj,
> + struct i915_gem_context *ctx)
> +{
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + const unsigned long npages = obj->base.size >> PAGE_SHIFT;
> + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;
vm = &(ctx->ppgtt ?: &i915->ggtt)->base? Or does GCC bork up.
Long line anyway.
> + struct intel_engine_cs *engine = i915->engine[RCS];
rcs_fill as function name (rcs_fill_pages too)?
> + err = i915_gem_object_set_to_gtt_domain(obj, false);
Isn't the object most definitely going to be written by GPU?
> +
> + err = i915_vma_pin(vma, 0, 0, PIN_USER);
> + if (err)
> + return err;
> +
> + GEM_BUG_ON(!IS_ALIGNED(npages, 1024));
Ok, #define time 1024 is a very magicy.
> + for (page = 0; page < npages; page += 1024) {
> + unsigned int v = page / 1024;
> + struct drm_i915_gem_request *rq;
> + struct i915_vma *batch;
> +
> + batch = gpu_fill_pages(vma, page, v*sizeof(u32), 1024, v);
> + if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_unpin;
> + i915_vma_unpin(vma);
> + return PTR_ERR(batch);
> + }
> +
> + rq = i915_gem_request_alloc(engine, ctx);
> + if (IS_ERR(rq)) {
> + i915_vma_unpin(batch);
> + i915_vma_unpin(vma);
> + return PTR_ERR(rq);
goto out_unpin:
> + }
> +
> + i915_switch_context(rq);
GEM_BUG_ON(rq->engine != engine) to help readability.
This all makes me think how strange our internal API actually is.
> +
> + ww_mutex_lock(&obj->resv->lock, NULL);
> + reservation_object_add_excl_fence(obj->resv, &rq->fence);
Wasn't there a patch not to mess with the reservation internals (aka,
wrap it?)
> + ww_mutex_unlock(&obj->resv->lock);
> +
> + __i915_add_request(rq, true);
> + }
I imagine this work submission helper might come in handy as a separate
thing?
> +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
> +{
> + const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
I'm not sure what's the benefit compared to having 'i915' here and
HAS_LLC(i915) later. Except making cocci script more complex when we
i915->has_llc.
> + unsigned int n, m;
> + unsigned int need_flush;
> + int err;
> +
> + err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);
I wonder why we've not changed to bool.</ponder>
> + if (err)
> > + return err;
> +
> > + for (n = 0; n < 1024; n++) {
> > + u32 *map;
> +
> > + map = kmap_atomic(i915_gem_object_get_page(obj, n));
> > + for (m = 0; m < 1024; m++)
> > + map[m] = value;
> > + if (!has_llc)
> > + drm_clflush_virt_range(map, PAGE_SIZE);
> > + kunmap_atomic(map);
> > + }
> +
> > + i915_gem_obj_finish_shmem_access(obj);
> > + obj->base.read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
> > + obj->base.write_domain = 0;
> > + return 0;
> +}
> +
> +static int cpu_check(struct drm_i915_gem_object *obj,
> > + unsigned long num)
> +{
> + unsigned int n, m, max = (obj->base.size >> PAGE_SHIFT) / 1024;
Split assignment to different line, also, meaningful variable names
would be great.
> + unsigned int needs_flush;
> + int err;
> +
> + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
> + if (err)
> + return err;
> +
> + for (n = 0; !err && n < 1024; n++) {
> + u32 *map;
> +
> + map = kmap_atomic(i915_gem_object_get_page(obj, n));
Does some test check the kmap works?
> +static int igt_ctx_exec(void *arg)
> +{
<SNIP>
> + mutex_lock(&i915->drm.struct_mutex);
> + while (!time_after(jiffies, end_time)) {
Time budgeted function?
> + vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;
> + npages = min(vm->total / 2, 1024ull * 1024 * PAGE_SIZE);
> + npages >>= PAGE_SHIFT + 10;
> + npages <<= PAGE_SHIFT + 10;
What? Comment please.
> + obj = huge_gem_object(i915, 1024 * PAGE_SIZE, npages);
> > + if (IS_ERR(obj))
> > + break;
> +
> > + /* tie the handle to the drm_file for easy reaping */
> > + err = drm_gem_handle_create(file, &obj->base, &ignored);
> > + if (err) {
> > + i915_gem_object_put(obj);
> > + break;
> > + }
> +
> > + err = cpu_fill(obj, 0xdeadbeef);
> > + if (!err)
> > + err = gpu_fill(obj, ctx);
> > + if (err) {
> + pr_err("Failed to fill object, err=%d\n", err);
Might be informative if it was GPU or CPU? The functions themselves are
silent.
> + break;
> + }
> +
> + list_add_tail(&obj->batch_pool_link, &objects);
> + count++;
> + }
> + pr_info("Submitted %d contexts\n", count);
> +
> + count = 0;
> + list_for_each_entry(obj, &objects, batch_pool_link) {
> + if (!err)
> + err = cpu_check(obj, count);
break; count is not used after this point, so why does it matter after?
> + count++;
> + }
Regards, joonas
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
More information about the Intel-gfx
mailing list