[Intel-gfx] [PATCH 35/37] drm/i915: Live testing for context execution

Fri Jan 13 14:28:58 UTC 2017

On ke, 2017-01-11 at 21:09 +0000, Chris Wilson wrote:
> Check we can create and execution within a context.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

<SNIP>

> +static struct i915_vma *
> +gpu_fill_pages(struct i915_vma *vma,
> +	       unsigned long first_page,
> +	       unsigned int offset_in_page,
> +	       unsigned long count,
> +	       u32 value)
> +{
> +	struct drm_i915_gem_object *obj;
> +	const int gen = INTEL_GEN(vma->vm->i915);
> +	unsigned long sz = (4*count + 1)*sizeof(u32);
> +	u64 offset;
> +	u32 *cmd;
> +	int err;
> +
> +	GEM_BUG_ON(offset_in_page >= PAGE_SIZE);

offset_in_page is a function too...

<SNIP>

For future synchronization purposes, maybe document where the below was
cloned from?

> +	offset = PAGE_SIZE * first_page + offset_in_page;
> +	offset += vma->node.start;
> +	for (sz = 0; sz < count; sz++) {
> +		if (gen >= 8) {
> +			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
> +			*cmd++ = lower_32_bits(offset);
> +			*cmd++ = upper_32_bits(offset);
> +			*cmd++ = value;
> +		} else if (gen >= 6) {
> +			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
> +			*cmd++ = 0;
> +			*cmd++ = offset;

GEM_BUG_ON on overflows and so on. In the following branches too. And
maybe be explicit and "= lower_32_bits(offset);"?

> +			*cmd++ = value;
> +		} else if (gen >= 4) {
> +			*cmd++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
> +			*cmd++ = 0;
> +			*cmd++ = offset;
> +			*cmd++ = value;
> +		} else {
> +			*cmd++ = MI_STORE_DWORD_IMM | 1 << 22;
> +			*cmd++ = offset;
> +			*cmd++ = value;
> +		}
> +		offset += PAGE_SIZE;
> +	}

<SNIP>

> 
> +static int gpu_fill(struct drm_i915_gem_object *obj,
> +		    struct i915_gem_context *ctx)
> +{
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +	const unsigned long npages = obj->base.size >> PAGE_SHIFT;
> +	struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;

vm = &(ctx->ppgtt ?: &i915->ggtt)->base? Or does GCC bork up.

Long line anyway.

> +	struct intel_engine_cs *engine = i915->engine[RCS];

rcs_fill as function name (rcs_fill_pages too)?

> +	err = i915_gem_object_set_to_gtt_domain(obj, false);

Isn't the object most definitely going to be written by GPU?

> +
> +	err = i915_vma_pin(vma, 0, 0, PIN_USER);
> +	if (err)
> +		return err;
> +
> +	GEM_BUG_ON(!IS_ALIGNED(npages, 1024));

Ok, #define time 1024 is a very magicy.

> +	for (page = 0; page < npages; page += 1024) {
> +		unsigned int v = page / 1024;
> +		struct drm_i915_gem_request *rq;
> +		struct i915_vma *batch;
> +
> +		batch = gpu_fill_pages(vma, page, v*sizeof(u32), 1024, v);
> +		if (IS_ERR(batch)) {

err = PTR_ERR(batch);
goto out_unpin;

> +			i915_vma_unpin(vma);
> +			return PTR_ERR(batch);
> +		}
> +
> +		rq = i915_gem_request_alloc(engine, ctx);
> +		if (IS_ERR(rq)) {
> +			i915_vma_unpin(batch);
> +			i915_vma_unpin(vma);
> +			return PTR_ERR(rq);

goto out_unpin:

> +		}
> +
> +		i915_switch_context(rq);

GEM_BUG_ON(rq->engine != engine) to help readability.

This all makes me think how strange our internal API actually is.

> +
> +		ww_mutex_lock(&obj->resv->lock, NULL);
> +		reservation_object_add_excl_fence(obj->resv, &rq->fence);

Wasn't there a patch not to mess with the reservation internals (aka,
wrap it?)

> +		ww_mutex_unlock(&obj->resv->lock);
> +
> +		__i915_add_request(rq, true);
> +	}

I imagine this work submission helper might come in handy as a separate
thing?

> +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
> +{
> +	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));

I'm not sure what's the benefit compared to having 'i915' here and
HAS_LLC(i915) later. Except making cocci script more complex when we
i915->has_llc.

> +	unsigned int n, m;
> +	unsigned int need_flush;
> +	int err;
> +
> +	err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);

I wonder why we've not changed to bool.</ponder>

> +	if (err)
> > +		return err;
> +
> > +	for (n = 0; n < 1024; n++) {
> > +		u32 *map;
> +
> > +		map = kmap_atomic(i915_gem_object_get_page(obj, n));
> > +		for (m = 0; m < 1024; m++)
> > +			map[m] = value;
> > +		if (!has_llc)
> > +			drm_clflush_virt_range(map, PAGE_SIZE);
> > +		kunmap_atomic(map);
> > +	}
> +
> > +	i915_gem_obj_finish_shmem_access(obj);
> > +	obj->base.read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
> > +	obj->base.write_domain = 0;
> > +	return 0;
> +}
> +
> +static int cpu_check(struct drm_i915_gem_object *obj,
> > +		     unsigned long num)
> +{
> +	unsigned int n, m, max = (obj->base.size >> PAGE_SHIFT) / 1024;

Split assignment to different line, also, meaningful variable names
would be great.

> +	unsigned int needs_flush;
> +	int err;
> +
> +	err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
> +	if (err)
> +		return err;
> +
> +	for (n = 0; !err && n < 1024; n++) {
> +		u32 *map;
> +
> +		map = kmap_atomic(i915_gem_object_get_page(obj, n));

Does some test check the kmap works?

> +static int igt_ctx_exec(void *arg)
> +{

<SNIP>

> +	mutex_lock(&i915->drm.struct_mutex);
> +	while (!time_after(jiffies, end_time)) {

Time budgeted function?

> +		vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;
> +		npages = min(vm->total / 2, 1024ull * 1024 * PAGE_SIZE);
> +		npages >>= PAGE_SHIFT + 10;
> +		npages <<= PAGE_SHIFT + 10;

What? Comment please.

> +		obj = huge_gem_object(i915, 1024 * PAGE_SIZE, npages);
> > +		if (IS_ERR(obj))
> > +			break;
> +
> > +		/* tie the handle to the drm_file for easy reaping */
> > +		err = drm_gem_handle_create(file, &obj->base, &ignored);
> > +		if (err) {
> > +			i915_gem_object_put(obj);
> > +			break;
> > +		}
> +
> > +		err = cpu_fill(obj, 0xdeadbeef);
> > +		if (!err)
> > +			err = gpu_fill(obj, ctx);
> > +		if (err) {
> +			pr_err("Failed to fill object, err=%d\n", err);

Might be informative if it was GPU or CPU? The functions themselves are
silent.

> +			break;
> +		}
> +
> +		list_add_tail(&obj->batch_pool_link, &objects);
> +		count++;
> +	}
> +	pr_info("Submitted %d contexts\n", count);
> +
> +	count = 0;
> +	list_for_each_entry(obj, &objects, batch_pool_link) {
> +		if (!err)
> +			err = cpu_check(obj, count);

break; count is not used after this point, so why does it matter after?

> +		count++;
> +	}

Regards, joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation