[Intel-gfx] [PATCH igt v2] igt/perf: Read RCS0 timestamp directly
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Fri Dec 8 15:28:10 UTC 2017
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
On 08/12/17 15:14, Chris Wilson wrote:
> On Haswell, at least, MI_REPORT_PERF_COUNT is not flushed by the
> PIPECONTROL surrounding the batch. (In theory, before the breadcrumb is
> updated the CPU's view of memory is coherent with the GPU, i.e. all
> writes have landed and are visible to userspace. This does not appear to
> be the case for MI_REPORT_PERF_COUNT.) This makes it an unreliable
> method for querying the timestamp, so use MI_STORE_REGISTER_MEM instead.
>
> Testcase: igt/perf/oa-exponents
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> ---
> tests/perf.c | 80 +++++++++++++++++++++++++++++-------------------------------
> 1 file changed, 39 insertions(+), 41 deletions(-)
>
> diff --git a/tests/perf.c b/tests/perf.c
> index 05ec7a472..92e32d93c 100644
> --- a/tests/perf.c
> +++ b/tests/perf.c
> @@ -657,47 +657,46 @@ emit_report_perf_count(struct intel_batchbuffer *batch,
> }
>
> static uint32_t
> -i915_get_one_gpu_timestamp(uint32_t *context_id)
> +i915_get_one_gpu_timestamp(void)
> {
> - drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
> - drm_intel_context *mi_rpc_ctx = drm_intel_gem_context_create(bufmgr);
> - drm_intel_bo *mi_rpc_bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
> - struct intel_batchbuffer *mi_rpc_batch = intel_batchbuffer_alloc(bufmgr, devid);
> - int ret;
> + const bool r64b = intel_gen(devid) >= 8;
> + struct drm_i915_gem_execbuffer2 execbuf;
> + struct drm_i915_gem_exec_object2 obj[2];
> + struct drm_i915_gem_relocation_entry reloc;
> + uint32_t batch[16];
> uint32_t timestamp;
> + int i;
>
> - drm_intel_bufmgr_gem_enable_reuse(bufmgr);
> -
> - if (context_id) {
> - ret = drm_intel_gem_context_get_id(mi_rpc_ctx, context_id);
> - igt_assert_eq(ret, 0);
> - }
> -
> - igt_assert(mi_rpc_ctx);
> - igt_assert(mi_rpc_bo);
> - igt_assert(mi_rpc_batch);
> -
> - ret = drm_intel_bo_map(mi_rpc_bo, true);
> - igt_assert_eq(ret, 0);
> - memset(mi_rpc_bo->virtual, 0x80, 4096);
> - drm_intel_bo_unmap(mi_rpc_bo);
> -
> - emit_report_perf_count(mi_rpc_batch,
> - mi_rpc_bo, /* dst */
> - 0, /* dst offset in bytes */
> - 0xdeadbeef); /* report ID */
> -
> - intel_batchbuffer_flush_with_context(mi_rpc_batch, mi_rpc_ctx);
> -
> - ret = drm_intel_bo_map(mi_rpc_bo, false /* write enable */);
> - igt_assert_eq(ret, 0);
> - timestamp = ((uint32_t *)mi_rpc_bo->virtual)[1];
> - drm_intel_bo_unmap(mi_rpc_bo);
> + memset(obj, 0, sizeof(obj));
> + obj[0].handle = gem_create(drm_fd, 4096);
> + obj[1].handle = gem_create(drm_fd, 4096);
> + obj[1].relocs_ptr = to_user_pointer(&reloc);
> + obj[1].relocation_count = 1;
>
> - drm_intel_bo_unreference(mi_rpc_bo);
> - intel_batchbuffer_free(mi_rpc_batch);
> - drm_intel_gem_context_destroy(mi_rpc_ctx);
> - drm_intel_bufmgr_destroy(bufmgr);
> + i = 0;
> + batch[i++] = 0x24 << 23 | (1 + r64b); /* SRM */
> + batch[i++] = 0x2358; /* RCS0 timestamp */
> + reloc.target_handle = obj[0].handle;
> + reloc.presumed_offset = obj[0].offset;
> + reloc.offset = i * sizeof(batch[0]);
> + reloc.delta = 0;
> + reloc.read_domains = I915_GEM_DOMAIN_RENDER;
> + reloc.write_domain = I915_GEM_DOMAIN_RENDER;
> + batch[i++] = reloc.delta;
> + if (r64b)
> + batch[i++] = 0;
> + batch[i] = MI_BATCH_BUFFER_END;
> + gem_write(drm_fd, obj[1].handle, 0, batch, sizeof(batch));
> +
> + memset(&execbuf, 0, sizeof(execbuf));
> + execbuf.buffers_ptr = to_user_pointer(obj);
> + execbuf.buffer_count = 2;
> + execbuf.batch_len = 4096;
> + gem_execbuf(drm_fd, &execbuf);
> + gem_close(drm_fd, obj[1].handle);
> +
> + gem_read(drm_fd, obj[0].handle, 0, ×tamp, sizeof(timestamp));
> + gem_close(drm_fd, obj[0].handle);
>
> return timestamp;
> }
> @@ -1866,7 +1865,6 @@ test_oa_exponents(void)
> uint32_t n_reports = 0;
> uint32_t n_idle_reports = 0;
> uint32_t n_reads = 0;
> - uint32_t context_id;
> uint64_t first_timestamp = 0;
> bool check_first_timestamp = true;
> struct drm_i915_perf_record_header *header;
> @@ -1895,7 +1893,7 @@ test_oa_exponents(void)
> * first timestamp as way to filter previously
> * scheduled work that would have configured
> * the OA unit at a different period. */
> - first_timestamp = i915_get_one_gpu_timestamp(&context_id);
> + first_timestamp = i915_get_one_gpu_timestamp();
>
> while (n_reads < ARRAY_SIZE(reads) &&
> n_reports < ARRAY_SIZE(reports)) {
> @@ -2021,8 +2019,8 @@ test_oa_exponents(void)
> uint32_t *rpt = NULL, *last = NULL, *last_periodic = NULL;
>
> igt_debug(" > More than 5%% error: avg_ts_delta = %"PRIu64", delta_delta = %"PRIu64", "
> - "expected_delta = %"PRIu64", first_timestamp = %"PRIu64" ctx_id=%"PRIu32"\n",
> - average_timestamp_delta, delta_delta, expected_timestamp_delta, first_timestamp, context_id);
> + "expected_delta = %"PRIu64", first_timestamp = %"PRIu64"\n",
> + average_timestamp_delta, delta_delta, expected_timestamp_delta, first_timestamp);
> for (int i = 0; i < (n_reports - 1); i++) {
> /* XXX: calculating with u32 arithmetic to account for overflow */
> uint32_t u32_delta =
More information about the Intel-gfx
mailing list