[Intel-gfx] [PATCH igt v2] igt/perf: Read RCS0 timestamp directly

Fri Dec 8 15:28:10 UTC 2017

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>

On 08/12/17 15:14, Chris Wilson wrote:
> On Haswell, at least, MI_REPORT_PERF_COUNT is not flushed by the
> PIPECONTROL surrounding the batch. (In theory, before the breadcrumb is
> updated the CPU's view of memory is coherent with the GPU, i.e. all
> writes have landed and are visible to userspace. This does not appear to
> be the case for MI_REPORT_PERF_COUNT.) This makes it an unreliable
> method for querying the timestamp, so use MI_STORE_REGISTER_MEM instead.
>
> Testcase: igt/perf/oa-exponents
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> ---
>   tests/perf.c | 80 +++++++++++++++++++++++++++++-------------------------------
>   1 file changed, 39 insertions(+), 41 deletions(-)
>
> diff --git a/tests/perf.c b/tests/perf.c
> index 05ec7a472..92e32d93c 100644
> --- a/tests/perf.c
> +++ b/tests/perf.c
> @@ -657,47 +657,46 @@ emit_report_perf_count(struct intel_batchbuffer *batch,
>   }
>   
>   static uint32_t
> -i915_get_one_gpu_timestamp(uint32_t *context_id)
> +i915_get_one_gpu_timestamp(void)
>   {
> -	drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
> -	drm_intel_context *mi_rpc_ctx = drm_intel_gem_context_create(bufmgr);
> -	drm_intel_bo *mi_rpc_bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
> -	struct intel_batchbuffer *mi_rpc_batch = intel_batchbuffer_alloc(bufmgr, devid);
> -	int ret;
> +	const bool r64b = intel_gen(devid) >= 8;
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	struct drm_i915_gem_exec_object2 obj[2];
> +	struct drm_i915_gem_relocation_entry reloc;
> +	uint32_t batch[16];
>   	uint32_t timestamp;
> +	int i;
>   
> -	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
> -
> -	if (context_id) {
> -		ret = drm_intel_gem_context_get_id(mi_rpc_ctx, context_id);
> -		igt_assert_eq(ret, 0);
> -	}
> -
> -	igt_assert(mi_rpc_ctx);
> -	igt_assert(mi_rpc_bo);
> -	igt_assert(mi_rpc_batch);
> -
> -	ret = drm_intel_bo_map(mi_rpc_bo, true);
> -	igt_assert_eq(ret, 0);
> -	memset(mi_rpc_bo->virtual, 0x80, 4096);
> -	drm_intel_bo_unmap(mi_rpc_bo);
> -
> -	emit_report_perf_count(mi_rpc_batch,
> -			       mi_rpc_bo, /* dst */
> -			       0, /* dst offset in bytes */
> -			       0xdeadbeef); /* report ID */
> -
> -	intel_batchbuffer_flush_with_context(mi_rpc_batch, mi_rpc_ctx);
> -
> -	ret = drm_intel_bo_map(mi_rpc_bo, false /* write enable */);
> -	igt_assert_eq(ret, 0);
> -	timestamp = ((uint32_t *)mi_rpc_bo->virtual)[1];
> -	drm_intel_bo_unmap(mi_rpc_bo);
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = gem_create(drm_fd, 4096);
> +	obj[1].handle = gem_create(drm_fd, 4096);
> +	obj[1].relocs_ptr = to_user_pointer(&reloc);
> +	obj[1].relocation_count = 1;
>   
> -	drm_intel_bo_unreference(mi_rpc_bo);
> -	intel_batchbuffer_free(mi_rpc_batch);
> -	drm_intel_gem_context_destroy(mi_rpc_ctx);
> -	drm_intel_bufmgr_destroy(bufmgr);
> +	i = 0;
> +	batch[i++] = 0x24 << 23 | (1 + r64b); /* SRM */
> +	batch[i++] = 0x2358; /* RCS0 timestamp */
> +	reloc.target_handle = obj[0].handle;
> +	reloc.presumed_offset = obj[0].offset;
> +	reloc.offset = i * sizeof(batch[0]);
> +	reloc.delta = 0;
> +	reloc.read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc.write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = reloc.delta;
> +	if (r64b)
> +		batch[i++] = 0;
> +	batch[i] = MI_BATCH_BUFFER_END;
> +	gem_write(drm_fd, obj[1].handle, 0, batch, sizeof(batch));
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = 2;
> +	execbuf.batch_len = 4096;
> +	gem_execbuf(drm_fd, &execbuf);
> +	gem_close(drm_fd, obj[1].handle);
> +
> +	gem_read(drm_fd, obj[0].handle, 0, &timestamp, sizeof(timestamp));
> +	gem_close(drm_fd, obj[0].handle);
>   
>   	return timestamp;
>   }
> @@ -1866,7 +1865,6 @@ test_oa_exponents(void)
>   			uint32_t n_reports = 0;
>   			uint32_t n_idle_reports = 0;
>   			uint32_t n_reads = 0;
> -			uint32_t context_id;
>   			uint64_t first_timestamp = 0;
>   			bool check_first_timestamp = true;
>   			struct drm_i915_perf_record_header *header;
> @@ -1895,7 +1893,7 @@ test_oa_exponents(void)
>   			 * first timestamp as way to filter previously
>   			 * scheduled work that would have configured
>   			 * the OA unit at a different period. */
> -			first_timestamp = i915_get_one_gpu_timestamp(&context_id);
> +			first_timestamp = i915_get_one_gpu_timestamp();
>   
>   			while (n_reads < ARRAY_SIZE(reads) &&
>   			       n_reports < ARRAY_SIZE(reports)) {
> @@ -2021,8 +2019,8 @@ test_oa_exponents(void)
>   				uint32_t *rpt = NULL, *last = NULL, *last_periodic = NULL;
>   
>   				igt_debug(" > More than 5%% error: avg_ts_delta = %"PRIu64", delta_delta = %"PRIu64", "
> -					  "expected_delta = %"PRIu64", first_timestamp = %"PRIu64" ctx_id=%"PRIu32"\n",
> -					  average_timestamp_delta, delta_delta, expected_timestamp_delta, first_timestamp, context_id);
> +					  "expected_delta = %"PRIu64", first_timestamp = %"PRIu64"\n",
> +					  average_timestamp_delta, delta_delta, expected_timestamp_delta, first_timestamp);
>   				for (int i = 0; i < (n_reports - 1); i++) {
>   					/* XXX: calculating with u32 arithmetic to account for overflow */
>   					uint32_t u32_delta =