[Intel-gfx] [PATCH i-g-t v5 06/11] tests/perf: rework oa-exponent test

Tue Sep 19 10:25:08 UTC 2017

On 18/09/17 20:53, Matthew Auld wrote:
> On 08/31, Lionel Landwerlin wrote:
>> New issues that were discovered while making the tests work on Gen8+ :
>>
>>   - we need to measure timings between periodic reports and discard all
>>     other kind of reports
>>
>>   - it seems periodicity of the reports can be affected outside of RC6
>>     (frequency change), we can detect this by looking at the amount of
>>     clock cycles per timestamp deltas
>>
>> v2: Drop some unused variables (Matthew)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
> <SNIP>
>
>> +
>> +static uint32_t
>> +i915_get_one_gpu_timestamp(uint32_t *context_id)
>> +{
>> +	drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
>> +	drm_intel_context *mi_rpc_ctx = drm_intel_gem_context_create(bufmgr);
>> +	drm_intel_bo *mi_rpc_bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
> alignment=64 ?
>
>> +	struct intel_batchbuffer *mi_rpc_batch = intel_batchbuffer_alloc(bufmgr, devid);
>> +	int ret;
>> +	uint32_t timestamp;
>> +
>> +	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
>> +
>> +	if (context_id) {
>> +		ret = drm_intel_gem_context_get_id(mi_rpc_ctx, context_id);
>> +		igt_assert_eq(ret, 0);
>> +	}
>> +
>> +	igt_assert(mi_rpc_ctx);
>> +	igt_assert(mi_rpc_bo);
>> +	igt_assert(mi_rpc_batch);
>> +
>> +	ret = drm_intel_bo_map(mi_rpc_bo, true);
>> +	igt_assert_eq(ret, 0);
>> +	memset(mi_rpc_bo->virtual, 0x80, 4096);
>> +	drm_intel_bo_unmap(mi_rpc_bo);
>> +
>> +	emit_report_perf_count(mi_rpc_batch,
>> +			       mi_rpc_bo, /* dst */
>> +			       0, /* dst offset in bytes */
>> +			       0xdeadbeef); /* report ID */
>> +
>> +	intel_batchbuffer_flush_with_context(mi_rpc_batch, mi_rpc_ctx);
>> +
>> +	ret = drm_intel_bo_map(mi_rpc_bo, false /* write enable */);
>> +	igt_assert_eq(ret, 0);
>> +	timestamp = ((uint32_t *)mi_rpc_bo->virtual)[1];
>> +	drm_intel_bo_unmap(mi_rpc_bo);
>> +
>> +	drm_intel_bo_unreference(mi_rpc_bo);
>> +	intel_batchbuffer_free(mi_rpc_batch);
>> +	drm_intel_gem_context_destroy(mi_rpc_ctx);
>> +	drm_intel_bufmgr_destroy(bufmgr);
>> +
>> +	return timestamp;
>> +}
>>   
>>   static void
>>   hsw_sanity_check_render_basic_reports(uint32_t *oa_report0, uint32_t *oa_report1,
>> @@ -1050,7 +1206,6 @@ i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format,
>>   	return total_len;
>>   }
>>   
>> -
>>   /* CAP_SYS_ADMIN is required to open system wide metrics, unless the system
>>    * control parameter dev.i915.perf_stream_paranoid == 0 */
>>   static void
>> @@ -1365,20 +1520,6 @@ open_and_read_2_oa_reports(int format_id,
>>   	__perf_close(stream_fd);
>>   }
>>   
>> -static void
>> -gen8_read_report_clock_ratios(uint32_t *report,
>> -			      uint32_t *slice_freq_mhz,
>> -			      uint32_t *unslice_freq_mhz)
>> -{
>> -	uint32_t unslice_freq = report[0] & 0x1ff;
>> -	uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
>> -	uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
>> -	uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
>> -
>> -	*slice_freq_mhz = (slice_freq * 16666) / 1000;
>> -	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
>> -}
>> -
>>   static void
>>   print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
>>   {
>> @@ -1570,74 +1711,456 @@ test_oa_formats(void)
>>   	}
>>   }
>>   
>> +
>> +enum load {
>> +	LOW,
>> +	HIGH
>> +};
>> +
>> +#define LOAD_HELPER_PAUSE_USEC 500
>> +
>> +static struct load_helper {
>> +	int devid;
>> +	int has_ppgtt;
>> +	drm_intel_bufmgr *bufmgr;
>> +	drm_intel_context *context;
>> +	uint32_t context_id;
>> +	struct intel_batchbuffer *batch;
>> +	enum load load;
>> +	bool exit;
>> +	struct igt_helper_process igt_proc;
>> +	struct igt_buf src, dst;
>> +} lh = { 0, };
> We can drop the initialisation since this is static.
>
>> +
>> +static void load_helper_signal_handler(int sig)
>> +{
>> +	if (sig == SIGUSR2)
>> +		lh.load = lh.load == LOW ? HIGH : LOW;
>> +	else
>> +		lh.exit = true;
>> +}
>> +
>> +static void load_helper_set_load(enum load load)
>> +{
>> +	igt_assert(lh.igt_proc.running);
>> +
>> +	if (lh.load == load)
>> +		return;
>> +
>> +	lh.load = load;
>> +	kill(lh.igt_proc.pid, SIGUSR2);
>> +}
>> +
>> +static void load_helper_run(enum load load)
>> +{
>> +	/*
>> +	 * FIXME fork helpers won't get cleaned up when started from within a
>> +	 * subtest, so handle the case where it sticks around a bit too long.
>> +	 */
>> +	if (lh.igt_proc.running) {
>> +		load_helper_set_load(load);
>> +		return;
>> +	}
>> +
>> +	lh.load = load;
>> +
>> +	igt_fork_helper(&lh.igt_proc) {
>> +		signal(SIGUSR1, load_helper_signal_handler);
>> +		signal(SIGUSR2, load_helper_signal_handler);
>> +
>> +		while (!lh.exit) {
>> +			int ret;
>> +
>> +			render_copy(lh.batch,
>> +				    lh.context,
>> +				    &lh.src, 0, 0, 1920, 1080,
>> +				    &lh.dst, 0, 0);
>> +
>> +			intel_batchbuffer_flush_with_context(lh.batch,
>> +							     lh.context);
>> +
>> +			ret = drm_intel_gem_context_get_id(lh.context,
>> +							   &lh.context_id);
>> +			igt_assert_eq(ret, 0);
>> +
>> +			drm_intel_bo_wait_rendering(lh.dst.bo);
>> +
>> +			/* Lower the load by pausing after every submitted
>> +			 * write. */
>> +			if (lh.load == LOW)
>> +				usleep(LOAD_HELPER_PAUSE_USEC);
>> +		}
>> +	}
>> +}
>> +
>> +static void load_helper_stop(void)
>> +{
>> +	kill(lh.igt_proc.pid, SIGUSR1);
>> +	igt_assert(igt_wait_helper(&lh.igt_proc) == 0);
>> +}
>> +
>> +static void load_helper_init(void)
>> +{
>> +	int ret;
>> +
>> +	lh.devid = intel_get_drm_devid(drm_fd);
>> +	lh.has_ppgtt = gem_uses_ppgtt(drm_fd);
>> +
>> +	/* MI_STORE_DATA can only use GTT address on gen4+/g33 and needs
>> +	 * snoopable mem on pre-gen6. Hence load-helper only works on gen6+, but
>> +	 * that's also all we care about for the rps testcase*/
>> +	igt_assert(intel_gen(lh.devid) >= 6);
>> +	lh.bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
>> +	igt_assert(lh.bufmgr);
>> +
>> +	drm_intel_bufmgr_gem_enable_reuse(lh.bufmgr);
>> +
>> +	lh.context = drm_intel_gem_context_create(lh.bufmgr);
>> +	igt_assert(lh.context);
>> +
>> +	lh.context_id = 0xffffffff;
>> +	ret = drm_intel_gem_context_get_id(lh.context, &lh.context_id);
>> +	igt_assert_eq(ret, 0);
>> +	igt_assert_neq(lh.context_id, 0xffffffff);
>> +
>> +	lh.batch = intel_batchbuffer_alloc(lh.bufmgr, lh.devid);
>> +	igt_assert(lh.batch);
>> +
>> +	scratch_buf_init(lh.bufmgr, &lh.dst, 1920, 1080, 0);
>> +	scratch_buf_init(lh.bufmgr, &lh.src, 1920, 1080, 0);
>> +}
>> +
>> +static void load_helper_fini(void)
>> +{
>> +	if (lh.igt_proc.running)
>> +		load_helper_stop();
>> +
>> +	if (lh.src.bo)
>> +		drm_intel_bo_unreference(lh.src.bo);
>> +	if (lh.dst.bo)
>> +		drm_intel_bo_unreference(lh.dst.bo);
>> +
>> +	if (lh.batch)
>> +		intel_batchbuffer_free(lh.batch);
>> +
>> +	if (lh.context)
>> +		drm_intel_gem_context_destroy(lh.context);
>> +
>> +	if (lh.bufmgr)
>> +		drm_intel_bufmgr_destroy(lh.bufmgr);
>> +}
>> +
>>   static void
>>   test_oa_exponents(void)
>>   {
>> -	igt_debug("Testing OA timer exponents\n");
>> +	load_helper_init();
>> +	load_helper_run(HIGH);
>>   
>>   	/* It's asking a lot to sample with a 160 nanosecond period and the
>>   	 * test can fail due to buffer overflows if it wasn't possible to
>>   	 * keep up, so we don't start from an exponent of zero...
>>   	 */
>> -	for (int i = 5; i < 20; i++) {
>> -		uint32_t expected_timestamp_delta;
>> -		uint32_t timestamp_delta;
>> -		uint32_t oa_report0[64];
>> -		uint32_t oa_report1[64];
>> +	for (int exponent = 5; exponent < 18; exponent++) {
> Why the change from 20 -> 18?

It basically divides the amount of time we need to run that test by 4 :)

>
> Otherwise looks okay:
> Reviewed-by: Matthew Auld <matthew.auld at intel.com>
>
>