[Intel-gfx] [PATCH i-g-t 2/9] tests/perf: Test i915 assisted command stream based perf metrics capture

Ewelina Musial ewelina.musial at intel.com
Thu Sep 21 13:50:09 UTC 2017


On Wed, Sep 13, 2017 at 04:22:01PM +0530, Sagar Arun Kamble wrote:
> This tests different performance metrics being streamed by i915 driver.
> This feature in i915 also referred as Driver Assisted Performance
> Capture (DAPC) provides userspace an ability to sample the OA reports
> at execbuf boundaries and associate other metadata like CTX ID, PID, TAG
> with each sample. Further, ability to capture engine timestamps and MMIO
> reads is also provided.
> 
> v2: Defining the enums for OA_SOURCE and PERF_PROP locally till the
> libdrm changes are merged.
> 
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
Reviewed-by: Ewelina Musial <ewelina.musial at intel.com>
> ---
>  tests/Makefile.sources  |   1 +
>  tests/intel_perf_dapc.c | 811 ++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 812 insertions(+)
>  create mode 100644 tests/intel_perf_dapc.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 6c19509..24bd099 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -170,6 +170,7 @@ TESTS_progs = \
>  	gen7_forcewake_mt \
>  	gvt_basic \
>  	intel_perf \
> +	intel_perf_dapc \
>  	kms_3d \
>  	kms_addfb_basic \
>  	kms_atomic \
> diff --git a/tests/intel_perf_dapc.c b/tests/intel_perf_dapc.c
> new file mode 100644
> index 0000000..92b4dee
> --- /dev/null
> +++ b/tests/intel_perf_dapc.c
> @@ -0,0 +1,811 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +#include <fcntl.h>
> +
> +#include "igt.h"
> +#include "drm.h"
> +
> +IGT_TEST_DESCRIPTION("Test the i915 command stream based perf metrics streaming interface");
> +
> +/* Temporarily copy i915-perf uapi here to avoid a dependency on libdrm's
> + * i915_drm.h copy being updated with the i915-perf interface before this
> + * test can land in i-g-t.
> + *
> + * TODO: remove this once the interface lands in libdrm
> + */
> +#ifndef DRM_I915_PERF_OPEN
> +#define DRM_I915_PERF_OPEN		0x36
> +#define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE +  \
> +						DRM_I915_PERF_OPEN, \
> +						struct drm_i915_perf_open_param)
> +
> +enum drm_i915_oa_format {
> +	I915_OA_FORMAT_A13 = 1,     /* HSW only */
> +	I915_OA_FORMAT_A29,         /* HSW only */
> +	I915_OA_FORMAT_A13_B8_C8,   /* HSW only */
> +	I915_OA_FORMAT_B4_C8,       /* HSW only */
> +	I915_OA_FORMAT_A45_B8_C8,   /* HSW only */
> +	I915_OA_FORMAT_B4_C8_A16,   /* HSW only */
> +	I915_OA_FORMAT_C4_B8,       /* HSW+ */
> +
> +	/* Gen8+ */
> +	I915_OA_FORMAT_A12,
> +	I915_OA_FORMAT_A12_B8_C8,
> +	I915_OA_FORMAT_A32u40_A4u32_B8_C8,
> +
> +	I915_OA_FORMAT_MAX /* non-ABI */
> +};
> +
> +enum drm_i915_perf_sample_oa_source {
> +	I915_PERF_SAMPLE_OA_SOURCE_OABUFFER,
> +	I915_PERF_SAMPLE_OA_SOURCE_CS,
> +	I915_PERF_SAMPLE_OA_SOURCE_MAX	/* non-ABI */
> +};
> +
> +#define I915_PERF_MMIO_NUM_MAX	8
> +struct drm_i915_perf_mmio_list {
> +	__u32 num_mmio;
> +	__u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
> +};
> +
> +enum drm_i915_perf_property_id {
> +	DRM_I915_PERF_PROP_CTX_HANDLE = 1,
> +	DRM_I915_PERF_PROP_SAMPLE_OA,
> +	DRM_I915_PERF_PROP_OA_METRICS_SET,
> +	DRM_I915_PERF_PROP_OA_FORMAT,
> +	DRM_I915_PERF_PROP_OA_EXPONENT,
> +	DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE,
> +	DRM_I915_PERF_PROP_ENGINE,
> +	DRM_I915_PERF_PROP_SAMPLE_CTX_ID,
> +	DRM_I915_PERF_PROP_SAMPLE_PID,
> +	DRM_I915_PERF_PROP_SAMPLE_TAG,
> +	DRM_I915_PERF_PROP_SAMPLE_TS,
> +	DRM_I915_PERF_PROP_SAMPLE_MMIO,
> +	DRM_I915_PERF_PROP_MAX /* non-ABI */
> +};
> +
> +struct drm_i915_perf_open_param {
> +	__u32 flags;
> +#define I915_PERF_FLAG_FD_CLOEXEC	(1<<0)
> +#define I915_PERF_FLAG_FD_NONBLOCK	(1<<1)
> +#define I915_PERF_FLAG_DISABLED		(1<<2)
> +
> +	__u32 num_properties;
> +	__u64 properties_ptr;
> +};
> +
> +#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
> +#define I915_PERF_IOCTL_DISABLE	_IO('i', 0x1)
> +
> +struct drm_i915_perf_record_header {
> +	__u32 type;
> +	__u16 pad;
> +	__u16 size;
> +};
> +
> +enum drm_i915_perf_record_type {
> +	DRM_I915_PERF_RECORD_SAMPLE = 1,
> +	DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2,
> +	DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3,
> +	DRM_I915_PERF_RECORD_MAX /* non-ABI */
> +};
> +#endif /* !DRM_I915_PERF_OPEN */
> +
> +/* There is no ifdef we can use for those formats :( */
> +enum {
> +	local_I915_OA_FORMAT_A12 = I915_OA_FORMAT_C4_B8 + 1,
> +	local_I915_OA_FORMAT_A12_B8_C8 = I915_OA_FORMAT_C4_B8 + 2,
> +	local_I915_OA_FORMAT_A32u40_A4u32_B8_C8 = I915_OA_FORMAT_C4_B8 + 3,
> +};
> +
> +#define local_I915_OA_FORMAT_MAX (local_I915_OA_FORMAT_A32u40_A4u32_B8_C8 + 1)
> +
> +enum {
> +	local_I915_PERF_SAMPLE_OA_SOURCE_OABUFFER,
> +	local_I915_PERF_SAMPLE_OA_SOURCE_CS,
> +	local_I915_PERF_SAMPLE_OA_SOURCE_MAX	/* non-ABI */
> +};
> +
> +enum {
> +	local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE = DRM_I915_PERF_PROP_OA_EXPONENT + 1,
> +	local_DRM_I915_PERF_PROP_ENGINE = DRM_I915_PERF_PROP_OA_EXPONENT + 2,
> +	local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID = DRM_I915_PERF_PROP_OA_EXPONENT + 3,
> +	local_DRM_I915_PERF_PROP_SAMPLE_PID = DRM_I915_PERF_PROP_OA_EXPONENT + 4,
> +	local_DRM_I915_PERF_PROP_SAMPLE_TAG = DRM_I915_PERF_PROP_OA_EXPONENT + 5,
> +	local_DRM_I915_PERF_PROP_SAMPLE_TS = DRM_I915_PERF_PROP_OA_EXPONENT + 6,
> +	local_DRM_I915_PERF_PROP_SAMPLE_MMIO = DRM_I915_PERF_PROP_OA_EXPONENT + 7,
> +	local_DRM_I915_PERF_PROP_MAX /* non-ABI */
> +};
> +
> +static struct {
> +	const char *name;
> +	size_t size;
> +	int a40_high_off; /* bytes */
> +	int a40_low_off;
> +	int n_a40;
> +	int a_off;
> +	int n_a;
> +	int first_a;
> +	int b_off;
> +	int n_b;
> +	int c_off;
> +	int n_c;
> +	int min_gen;
> +	int max_gen;
> +} oa_formats[local_I915_OA_FORMAT_MAX] = {
> +	[I915_OA_FORMAT_A13] = { /* HSW only */
> +		"A13", .size = 64,
> +		.a_off = 12, .n_a = 13,
> +		.max_gen = 7 },
> +	[I915_OA_FORMAT_A29] = { /* HSW only */
> +		"A29", .size = 128,
> +		.a_off = 12, .n_a = 29,
> +		.max_gen = 7 },
> +	[I915_OA_FORMAT_A13_B8_C8] = { /* HSW only */
> +		"A13_B8_C8", .size = 128,
> +		.a_off = 12, .n_a = 13,
> +		.b_off = 64, .n_b = 8,
> +		.c_off = 96, .n_c = 8,
> +		.max_gen = 7 },
> +	[I915_OA_FORMAT_A45_B8_C8] = { /* HSW only */
> +		"A45_B8_C8", .size = 256,
> +		.a_off = 12,  .n_a = 45,
> +		.b_off = 192, .n_b = 8,
> +		.c_off = 224, .n_c = 8,
> +		.max_gen = 7 },
> +	[I915_OA_FORMAT_B4_C8] = { /* HSW only */
> +		"B4_C8", .size = 64,
> +		.b_off = 16, .n_b = 4,
> +		.c_off = 32, .n_c = 8,
> +		.max_gen = 7 },
> +	[I915_OA_FORMAT_B4_C8_A16] = { /* HSW only */
> +		"B4_C8_A16", .size = 128,
> +		.b_off = 16, .n_b = 4,
> +		.c_off = 32, .n_c = 8,
> +		.a_off = 60, .n_a = 16, .first_a = 29,
> +		.max_gen = 7 },
> +	[I915_OA_FORMAT_C4_B8] = { /* HSW+ (header differs from HSW-Gen8+) */
> +		"C4_B8", .size = 64,
> +		.c_off = 16, .n_c = 4,
> +		.b_off = 28, .n_b = 8 },
> +
> +	/* Gen8+ */
> +
> +	[local_I915_OA_FORMAT_A12] = {
> +		"A12", .size = 64,
> +		.a_off = 12, .n_a = 12, .first_a = 7,
> +		.min_gen = 8 },
> +	[local_I915_OA_FORMAT_A12_B8_C8] = {
> +		"A12_B8_C8", .size = 128,
> +		.a_off = 12, .n_a = 12,
> +		.b_off = 64, .n_b = 8,
> +		.c_off = 96, .n_c = 8, .first_a = 7,
> +		.min_gen = 8 },
> +	[local_I915_OA_FORMAT_A32u40_A4u32_B8_C8] = {
> +		"A32u40_A4u32_B8_C8", .size = 256,
> +		.a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32,
> +		.a_off = 144, .n_a = 4, .first_a = 32,
> +		.b_off = 192, .n_b = 8,
> +		.c_off = 224, .n_c = 8,
> +		.min_gen = 8 },
> +	[I915_OA_FORMAT_C4_B8] = {
> +		"C4_B8", .size = 64,
> +		.c_off = 16, .n_c = 4,
> +		.b_off = 32, .n_b = 8,
> +		.min_gen = 8 },
> +};
> +
> +static int drm_fd = -1;
> +static uint32_t devid;
> +static int card = -1;
> +
> +static uint64_t test_metric_set_id = UINT64_MAX;
> +
> +static uint64_t timestamp_frequency = 12500000;

Please avoid magic numbers, we probably should define those somewhere.

> +static enum drm_i915_oa_format test_oa_format;
> +static uint64_t oa_exp_1_millisec;
> +
> +static igt_render_copyfunc_t render_copy = NULL;
> +
> +static uint64_t
> +timebase_scale(uint32_t u32_delta)
> +{
> +	return ((uint64_t)u32_delta * NSEC_PER_SEC) / timestamp_frequency;
> +}
> +
> +/* Returns: the largest OA exponent that will still result in a sampling period
> + * less than or equal to the given @period.
> + */
> +static int
> +max_oa_exponent_for_period_lte(uint64_t period)
> +{
> +	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> +	 * would already represent a period of ~3 minutes so there's
> +	 * really no need to consider higher exponents.
> +	 */
> +	for (int i = 0; i < 30; i++) {
> +		uint64_t oa_period = timebase_scale(2 << i);
> +
> +		if (oa_period > period)
> +			return max(0, i - 1);
> +	}
> +
> +	igt_assert(!"reached");
> +	return -1;
> +}
> +
> +static bool
> +try_read_u64_file(const char *file, uint64_t *val)
> +{
> +	char buf[32];
> +	int fd, n;
> +
> +	fd = open(file, O_RDONLY);
> +	if (fd < 0)
> +		return false;
> +
> +	while ((n = read(fd, buf, sizeof(buf) - 1)) < 0 && errno == EINTR)
> +		;
> +	igt_assert(n >= 0);
> +
> +	close(fd);
> +
> +	buf[n] = '\0';
> +	*val = strtoull(buf, NULL, 0);
> +
> +	return true;
> +}
> +
> +static void
> +write_u64_file(const char *file, uint64_t val)
> +{
> +	char buf[32];
> +	int fd, len, ret;
> +
> +	fd = open(file, O_WRONLY);
> +	igt_assert(fd >= 0);
> +
> +	len = snprintf(buf, sizeof(buf), "%"PRIu64, val);
> +	igt_assert(len > 0);
> +
> +	while ((ret = write(fd, buf, len)) < 0 && errno == EINTR)
> +		;
> +	igt_assert_eq(ret, len);
> +
> +	close(fd);
> +}
> +

Write/read helpers are used only once so maybe those functions are redundant?
Or if we want those helpers we could move them to libs.

> +static bool
> +init_sys_info(void)
> +{
> +	const char *test_set_name = NULL;
> +	const char *test_set_uuid = NULL;
> +	char buf[256];
> +
> +	igt_assert_neq(card, -1);
> +	igt_assert_neq(devid, 0);
> +
> +	timestamp_frequency = 12500000;

The same here - magic number

> +
> +	if (IS_HASWELL(devid)) {
> +		/* We don't have a TestOa metric set for Haswell so use
> +		 * RenderBasic
> +		 */
> +		test_set_name = "RenderBasic";
> +		test_set_uuid = "403d8832-1a27-4aa6-a64e-f5389ce7b212";
> +		test_oa_format = I915_OA_FORMAT_A45_B8_C8;
> +	} else {
> +		test_set_name = "TestOa";
> +		test_oa_format = local_I915_OA_FORMAT_A32u40_A4u32_B8_C8;
> +
> +		if (IS_BROADWELL(devid)) {
> +			test_set_uuid = "d6de6f55-e526-4f79-a6a6-d7315c09044e";
> +		} else if (IS_CHERRYVIEW(devid)) {
> +			test_set_uuid = "4a534b07-cba3-414d-8d60-874830e883aa";
> +		} else if (IS_SKYLAKE(devid)) {
> +			switch (intel_gt(devid)) {
> +			case 1:
> +				test_set_uuid =
> +					"1651949f-0ac0-4cb1-a06f-dafd74a407d1";
> +				break;
> +			case 2:
> +				test_set_uuid =
> +					"2b985803-d3c9-4629-8a4f-634bfecba0e8";
> +				break;
> +			case 3:
> +				test_set_uuid =
> +					"882fa433-1f4a-4a67-a962-c741888fe5f5";
> +				break;
> +			default:
> +				igt_debug("unsupported Skylake GT size\n");
> +				return false;
> +			}
> +			timestamp_frequency = 12000000;

And here :)

> +		} else if (IS_BROXTON(devid)) {
> +			test_set_uuid = "5ee72f5c-092f-421e-8b70-225f7c3e9612";
> +			timestamp_frequency = 19200000;
> +		} else if (IS_KABYLAKE(devid)) {
> +			switch (intel_gt(devid)) {
> +			case 1:
> +				test_set_uuid =
> +					"baa3c7e4-52b6-4b85-801e-465a94b746dd";
> +				break;
> +			case 2:
> +				test_set_uuid =
> +					"f1792f32-6db2-4b50-b4b2-557128f1688d";
> +				break;
> +			default:
> +				igt_debug("unsupported Kabylake GT size\n");
> +				return false;
> +			}
> +			timestamp_frequency = 12000000;
> +		} else if (IS_GEMINILAKE(devid)) {
> +			test_set_uuid = "dd3fd789-e783-4204-8cd0-b671bbccb0cf";
> +			timestamp_frequency = 19200000;
> +		} else {
> +			igt_debug("unsupported GT\n");
> +			return false;
> +		}
> +	}
> +
> +	igt_debug("%s metric set UUID = %s\n",
> +		  test_set_name,
> +		  test_set_uuid);
> +
> +	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
> +
> +	snprintf(buf, sizeof(buf),
> +		 "/sys/class/drm/card%d/metrics/%s/id",
> +		 card,
> +		 test_set_uuid);
> +
> +	return try_read_u64_file(buf, &test_metric_set_id);
> +}
> +
> +static int
> +__perf_open(int fd, struct drm_i915_perf_open_param *param)
> +{
> +	int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
> +
> +	igt_assert(ret >= 0);
> +	errno = 0;
> +
> +	return ret;
> +}
> +
> +static void
> +test_cs_oa_stream_create(void)
> +{
> +	igt_fork(child, 1) {
> +		uint64_t properties[] = {
> +			/* Include OA reports in samples */
> +			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> +
> +			/* OA unit configuration */
> +			DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
> +			DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
> +			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
> +
> +			/* CS parameters */
> +			local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true,
> +			local_DRM_I915_PERF_PROP_ENGINE, I915_EXEC_BSD,
> +		};
> +		struct drm_i915_perf_open_param param = {
> +			.flags = I915_PERF_FLAG_FD_CLOEXEC |
> +				I915_PERF_FLAG_FD_NONBLOCK,
> +			.num_properties = sizeof(properties) / 16,
> +			.properties_ptr = to_user_pointer(properties),
> +		};
> +		int stream_fd;
> +
> +		do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
> +
> +		/* Send Render Engine as PROP_ENGINE */
> +		properties[ARRAY_SIZE(properties)-1] = I915_EXEC_RENDER;
> +
> +		stream_fd = __perf_open(drm_fd, &param);
> +		close(stream_fd);
> +	}
> +
> +	igt_waitchildren();
> +}
> +
> +static void
> +scratch_buf_init(drm_intel_bufmgr *bufmgr,
> +		 struct igt_buf *buf,
> +		 int width, int height,
> +		 uint32_t color)
> +{
> +	size_t stride = width * 4;
> +	size_t size = stride * height;
> +	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
> +	int ret;
> +
> +	ret = drm_intel_bo_map(bo, true /* writable */);
> +	igt_assert_eq(ret, 0);
> +
> +	for (int i = 0; i < width * height; i++)
> +		((uint32_t *)bo->virtual)[i] = color;
> +
> +	drm_intel_bo_unmap(bo);
> +
> +	buf->bo = bo;
> +	buf->stride = stride;
> +	buf->tiling = I915_TILING_NONE;
> +	buf->size = size;
> +}
> +
> +/*
> + * Given a set of CS properties including DRM_I915_PERF_PROP_SAMPLE_OA
> + * this function returns the offset in the sample where OA report will
> + * be located.
> + */
> +static size_t
> +get_oa_report_offset(uint64_t *properties, int prop_size)
> +{
> +	size_t offset = 0;
> +	int i = 0;
> +
> +	do {
> +		switch (properties[i]) {
> +		case local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE:
> +			if (properties[i+1]) {
> +				offset += 8;
> +				i += 2;
> +			}
> +			break;
> +		case local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID:
> +			if (properties[i+1]) {
> +				offset += 8;
> +				i += 2;
> +			}
> +			break;
> +		case local_DRM_I915_PERF_PROP_SAMPLE_PID:
> +			if (properties[i+1]) {
> +				offset += 8;
> +				i += 2;
> +			}
> +			break;
> +		case local_DRM_I915_PERF_PROP_SAMPLE_TAG:
> +			if (properties[i+1]) {
> +				offset += 8;
> +				i += 2;
> +			}
> +			break;
> +		case local_DRM_I915_PERF_PROP_SAMPLE_TS:
> +			if (properties[i+1]) {
> +				offset += 8;
> +				i += 2;
> +			}
> +			break;

Why are you doing the same thing for each case separately?

> +		case DRM_I915_PERF_PROP_CTX_HANDLE:
> +		case DRM_I915_PERF_PROP_SAMPLE_OA:
> +		case DRM_I915_PERF_PROP_OA_METRICS_SET:
> +		case DRM_I915_PERF_PROP_OA_FORMAT:
> +		case DRM_I915_PERF_PROP_OA_EXPONENT:
> +		case local_DRM_I915_PERF_PROP_ENGINE:
> +			i += 2;
> +			break;
> +		}
> +
> +		if (properties[i] == local_DRM_I915_PERF_PROP_SAMPLE_MMIO) {
> +			uint32_t num_mmio = *((uint32_t *)properties[i+1]);
> +
> +			offset += (num_mmio * 4);
> +			i += 2;
> +		}
> +	} while (i < prop_size);
> +
> +	return offset;
> +}
> +
> +static size_t
> +get_perf_report_size(uint64_t *properties, int prop_size, int format_id)
> +{
> +	size_t format_size = oa_formats[format_id].size;
> +	size_t sample_size = 0;
> +
> +	sample_size += get_oa_report_offset(properties, prop_size);
> +	sample_size += format_size;
> +
> +	return sample_size;
> +}
> +
> +static bool
> +read_perf_reports(int stream_fd,
> +		  uint8_t *perf_reports,
> +		  int num_reports,
> +		  size_t report_size,
> +		  bool retry_on_loss)
> +{
> +	size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
> +			      report_size);
> +	const struct drm_i915_perf_record_header *header;
> +	uint8_t *base_perf_reports = perf_reports;
> +	int i = 0;
> +
> +	/* Note: we allocate a large buffer so that each read() iteration
> +	 * should scrape *all* pending records.
> +	 *
> +	 * The largest buffer the OA unit supports is 16MB and the smallest
> +	 * perf report format is 64bytes + 8bytes allowing up to 233016
> +	 * reports to be buffered.
> +	 *
> +	 * Being sure we are fetching all buffered reports allows us to
> +	 * potentially throw away / skip all reports whenever we see
> +	 * a _REPORT_LOST notification as a way of being sure are
> +	 * measurements aren't skewed by a lost report.
> +	 *
> +	 * Note: that is is useful for some tests but also not something
> +	 * applications would be expected to resort to. Lost reports are
> +	 * somewhat unpredictable but typically don't pose a problem - except
> +	 * to indicate that the OA unit may be over taxed if lots of reports
> +	 * are being lost.
> +	 */
> +	int buf_size = 233016 *
> +		       (72 + sizeof(struct drm_i915_perf_record_header));
> +	uint8_t *buf = malloc(buf_size);
> +
> +	igt_assert(buf);
> +
> +	do {
> +		ssize_t len;
> +
> +		while ((len = read(stream_fd, buf, buf_size)) < 0 &&
> +		       errno == EINTR)
> +			;
> +
> +		igt_assert(len > 0);
> +
> +		for (size_t offset = 0; offset < len; offset += header->size) {
> +			const uint8_t *report;
> +			size_t sample_offset = 0;
> +
> +			header = (void *)(buf + offset);
> +
> +			igt_assert_eq(header->pad, 0); /* Reserved */
> +
> +			/* Currently the only test that should ever expect to
> +			 * see a _BUFFER_LOST error is the buffer_fill test,
> +			 * otherwise something bad has probably happened...
> +			 */
> +			igt_assert_neq(header->type,
> +				       DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> +
> +			/* At high sampling frequencies the OA HW might not be
> +			 * able to cope with all write requests and will notify
> +			 * us that a report was lost. We restart our read of
> +			 * two sequential reports due to the timeline blip this
> +			 * implies
> +			 */
> +			if (header->type ==
> +			    DRM_I915_PERF_RECORD_OA_REPORT_LOST) {
> +				igt_debug("read restart: OA trigger collision "
> +					  "/ report lost\n");
> +				if (!retry_on_loss) {
> +					igt_debug("Freeing memory\n");
> +					free(buf);
> +					return false;
> +				}
> +				i = 0;
> +				perf_reports = base_perf_reports;
> +
> +				/* XXX: break, because we don't know where
> +				 * within the series of already read reports
> +				 * there could be a blip from the lost report.
> +				 */
> +				break;
> +			}
> +
> +			/* Currently the only other record type expected is a
> +			 * _SAMPLE. Notably this test will need updating if
> +			 * i915-perf is extended in the future with additional
> +			 * record types.
> +			 */
> +			igt_assert_eq(header->type,
> +				      DRM_I915_PERF_RECORD_SAMPLE);
> +
> +			igt_assert_eq(header->size, sample_size);
> +
> +			sample_offset = offset +
> +				sizeof(struct drm_i915_perf_record_header);
> +			report = (const uint8_t *)(buf + sample_offset);
> +
> +			memcpy(perf_reports, report, report_size);
> +			perf_reports += report_size;
> +			i++;
> +			if (i == num_reports)
> +				break;
> +		}
> +	} while (i < num_reports);
> +
> +	free(buf);
> +	return true;
> +}
> +
> +static void
> +perf_stream_capture_workload_samples(struct drm_i915_perf_open_param *param,
> +				     uint8_t *perf_reports,
> +				     int num_reports, int report_size)
> +{
> +	drm_intel_bufmgr *bufmgr;
> +	drm_intel_context *context0;
> +	struct intel_batchbuffer *batch;
> +	struct igt_buf src, dst;
> +	int width = 800;
> +	int height = 600;
> +	uint32_t ctx_id = 0xffffffff; /* invalid id */
> +	int stream_fd;
> +	int ret;
> +	bool valid_data = false;
> +
> +retry:
> +	bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
> +	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
> +
> +	scratch_buf_init(bufmgr, &src, width, height, 0xff0000ff);
> +	scratch_buf_init(bufmgr, &dst, width, height, 0x00ff00ff);
> +
> +	batch = intel_batchbuffer_alloc(bufmgr, devid);
> +
> +	context0 = drm_intel_gem_context_create(bufmgr);
> +	igt_assert(context0);
> +
> +	ret = drm_intel_gem_context_get_id(context0, &ctx_id);
> +	igt_assert_eq(ret, 0);
> +	igt_assert_neq(ctx_id, 0xffffffff);
> +
> +	igt_debug("opening i915-perf stream\n");
> +	stream_fd = __perf_open(drm_fd, param);
> +
> +	render_copy(batch,
> +		    context0,
> +		    &src, 0, 0, width, height,
> +		    &dst, 0, 0);
> +
> +	intel_batchbuffer_flush_with_context(batch, context0);
> +
> +	drm_intel_bo_unreference(src.bo);
> +	drm_intel_bo_unreference(dst.bo);
> +
> +	intel_batchbuffer_free(batch);
> +	drm_intel_gem_context_destroy(context0);
> +	drm_intel_bufmgr_destroy(bufmgr);
> +
> +	valid_data = read_perf_reports(stream_fd, perf_reports,
> +				       num_reports, report_size,
> +				       false);
> +	if (!valid_data) {
> +		close(stream_fd);
> +		goto retry;
> +	}
> +}
> +
> +struct oa_source_sample {
> +	uint64_t source;
> +	uint64_t ctx_id;
> +	uint8_t oa_report[];
> +};
> +
> +#define SOURCE(i)	(i == 0) ? "OABUFFER" : "CS"
> +
> +static void
> +verify_source(uint8_t *perf_reports, int num_reports, size_t report_size)
> +{
> +	struct oa_source_sample *sample;
> +	uint32_t *oa_report;
> +
> +	for (int i = 0; i < num_reports; i++) {
> +		size_t offset = i * report_size;
> +
> +		sample = (struct oa_source_sample *) (perf_reports + offset);
> +		oa_report = (uint32_t *) sample->oa_report;
> +
> +		igt_debug("read report: source= %s, reason = %x, "
> +			  "timestamp = %x\n",
> +			  SOURCE(sample->source), oa_report[0], oa_report[1]);
> +
> +		igt_assert((sample->source ==
> +			    local_I915_PERF_SAMPLE_OA_SOURCE_OABUFFER) ||
> +			   (sample->source ==
> +			    local_I915_PERF_SAMPLE_OA_SOURCE_CS));
> +
> +		if (sample->source == local_I915_PERF_SAMPLE_OA_SOURCE_CS)
> +			igt_assert(!oa_report[0]);
> +
> +		/* Don't expect zero for timestamps */
> +		igt_assert_neq(oa_report[1], 0);
> +	}
> +}
> +
> +static void
> +test_oa_source(void)
> +{
> +	uint64_t properties[] = {
> +		/* Include OA reports in samples */
> +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> +
> +		/* OA unit configuration */
> +		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
> +		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
> +		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
> +
> +		/* CS parameters */
> +		local_DRM_I915_PERF_PROP_ENGINE, I915_EXEC_RENDER,
> +		local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, true,
> +		local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true,
> +	};
> +	struct drm_i915_perf_open_param param = {
> +		.flags = I915_PERF_FLAG_FD_CLOEXEC,
> +		.num_properties = sizeof(properties) / 16,
> +		.properties_ptr = to_user_pointer(properties),
> +	};
> +
> +	/* should be default, but just to be sure... */
> +	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
> +
> +	igt_fork(child, 1) {
> +		int prop_size = ARRAY_SIZE(properties);
> +		int num_reports = 10;
> +		int report_size = get_perf_report_size(properties, prop_size,
> +						       test_oa_format);
> +		int total_size = num_reports * report_size;
> +		uint8_t *perf_reports = malloc(total_size);
> +
> +		igt_assert(perf_reports);
> +
> +		perf_stream_capture_workload_samples(&param, perf_reports,
> +						     num_reports, report_size);
> +		verify_source(perf_reports, num_reports, report_size);
> +		free(perf_reports);
> +	}
> +
> +	igt_waitchildren();
> +}
> +
> +igt_main
> +{
> +	igt_skip_on_simulation();
> +
> +	igt_fixture {
> +		drm_fd = drm_open_driver_render(DRIVER_INTEL);
> +		devid = intel_get_drm_devid(drm_fd);
> +		card = drm_get_card();
> +
> +		igt_require(init_sys_info());
> +
> +		render_copy = igt_get_render_copyfunc(devid);
> +		igt_require_f(render_copy, "no render-copy function\n");
> +	}
> +
> +	igt_subtest("cs-oa-stream-create")
> +		test_cs_oa_stream_create();
> +
> +	igt_subtest("oa-source")
> +		test_oa_source();
> +
> +	igt_fixture {
> +		close(drm_fd);
> +	}
> +}
> -- 
> 1.9.1
> 
--
Cheers,
Ewelina

> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list