[igt-dev] [PATCH i-g-t v3 12/13] tests/perf: remove libdrm dependency for rendercopy

Fri Jul 24 11:48:10 UTC 2020

On 24/07/2020 14:37, Zbigniew Kempczyński wrote:
> On Fri, Jul 24, 2020 at 02:10:51PM +0300, Lionel Landwerlin wrote:
>> On 24/07/2020 12:56, Zbigniew Kempczyński wrote:
>>> Rendercopy now uses no-drm version so all users has to
>>> migrate to new interface.
>>>
>>> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
>>> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>>> Cc: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
>>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> Thanks a bunch for this.
>>
>> I have only a single question below, otherwise it looks great.
>>
>> -Lionel
>>
>>
>>> ---
>>>    tests/i915/perf.c | 663 ++++++++++++++++++++--------------------------
>>>    1 file changed, 281 insertions(+), 382 deletions(-)
>>>
>>> diff --git a/tests/i915/perf.c b/tests/i915/perf.c
>>> index 92edc9f1..fd2b4073 100644
>>> --- a/tests/i915/perf.c
>>> +++ b/tests/i915/perf.c
>>> @@ -497,64 +497,42 @@ oa_report_get_ctx_id(uint32_t *report)
>>>    }
>>>    static void
>>> -scratch_buf_memset(drm_intel_bo *bo, int width, int height, uint32_t color)
>>> +scratch_buf_memset(struct intel_buf *buf, int width, int height, uint32_t color)
>>>    {
>>> -	int ret;
>>> -
>>> -	ret = drm_intel_bo_map(bo, true /* writable */);
>>> -	igt_assert_eq(ret, 0);
>>> +	intel_buf_cpu_map(buf, true);
>>>    	for (int i = 0; i < width * height; i++)
>>> -		((uint32_t *)bo->virtual)[i] = color;
>>> +		buf->ptr[i] = color;
>>> -	drm_intel_bo_unmap(bo);
>>> +	intel_buf_unmap(buf);
>>>    }
>>>    static void
>>> -scratch_buf_init(drm_intel_bufmgr *bufmgr,
>>> -		 struct igt_buf *buf,
>>> +scratch_buf_init(struct buf_ops *bops,
>>> +		 struct intel_buf *buf,
>>>    		 int width, int height,
>>>    		 uint32_t color)
>>>    {
>>> -	size_t stride = width * 4;
>>> -	size_t size = stride * height;
>>> -	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
>>> -
>>> -	scratch_buf_memset(bo, width, height, color);
>>> -
>>> -	memset(buf, 0, sizeof(*buf));
>>> -
>>> -	buf->bo = bo;
>>> -	buf->surface[0].stride = stride;
>>> -	buf->tiling = I915_TILING_NONE;
>>> -	buf->surface[0].size = size;
>>> -	buf->bpp = 32;
>>> +	intel_buf_init(bops, buf, width, height, 32, 0,
>>> +		       I915_TILING_NONE, I915_COMPRESSION_NONE);
>>> +	scratch_buf_memset(buf, width, height, color);
>>>    }
>>>    static void
>>> -emit_report_perf_count(struct intel_batchbuffer *batch,
>>> -		       drm_intel_bo *dst_bo,
>>> +emit_report_perf_count(struct intel_bb *ibb,
>>> +		       struct intel_buf *dst,
>>>    		       int dst_offset,
>>>    		       uint32_t report_id)
>>>    {
>>> -	if (IS_HASWELL(devid)) {
>>> -		BEGIN_BATCH(3, 1);
>>> -		OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
>>> -		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
>>> -			  dst_offset);
>>> -		OUT_BATCH(report_id);
>>> -		ADVANCE_BATCH();
>>> -	} else {
>>> -		/* XXX: NB: n dwords arg is actually magic since it internally
>>> -		 * automatically accounts for larger addresses on gen >= 8...
>>> -		 */
>>> -		BEGIN_BATCH(3, 1);
>>> -		OUT_BATCH(GEN8_MI_REPORT_PERF_COUNT);
>>> -		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
>>> -			  dst_offset);
>>> -		OUT_BATCH(report_id);
>>> -		ADVANCE_BATCH();
>>> -	}
>>> +	if (IS_HASWELL(devid))
>>> +		intel_bb_out(ibb, GEN6_MI_REPORT_PERF_COUNT);
>>> +	else
>>> +		intel_bb_out(ibb, GEN8_MI_REPORT_PERF_COUNT);
>>> +
>>> +	intel_bb_emit_reloc(ibb, dst->handle,
>>> +			    I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
>>> +			    dst_offset, dst->addr.offset);
>>> +	intel_bb_out(ibb, report_id);
>>>    }
>>>    static void
>>> @@ -1495,14 +1473,13 @@ enum load {
>>>    static struct load_helper {
>>>    	int devid;
>>> -	drm_intel_bufmgr *bufmgr;
>>> -	drm_intel_context *context;
>>> +	struct buf_ops *bops;
>>>    	uint32_t context_id;
>>> -	struct intel_batchbuffer *batch;
>>> +	struct intel_bb *ibb;
>>>    	enum load load;
>>>    	bool exit;
>>>    	struct igt_helper_process igt_proc;
>>> -	struct igt_buf src, dst;
>>> +	struct intel_buf src, dst;
>>>    } lh = { 0, };
>>>    static void load_helper_signal_handler(int sig)
>>> @@ -1524,6 +1501,14 @@ static void load_helper_set_load(enum load load)
>>>    	kill(lh.igt_proc.pid, SIGUSR2);
>>>    }
>>> +static void set_to_gtt_domain(struct intel_buf *buf, int writing)
>>> +{
>>> +	int i915 = buf_ops_get_fd(buf->bops);
>>> +
>>> +	gem_set_domain(i915, buf->handle, I915_GEM_DOMAIN_GTT,
>>> +		       writing ? I915_GEM_DOMAIN_GTT : 0);
>>> +}
>>
>> Is this what we actually want?
>>
>> Why not use a fence on the intel_bb_sync() ?
> I didn't want to introduce any logic changes so I did exactly
> the same drm_intel_bo_wait_rendering() is doing (setting to GTT domain).
> If your intention is just to wait for completion I can just do
> intel_bb_sync(ibb) to wait on a fence.
>
> --
> Zbigniew

Yeah, the intention was just to not clog the GPU with loads of blits.

Thanks,

-Lionel