[Intel-gfx] [PATCH i-g-t 1/1] igt/dapc: Test Driver Assisted Performance Capture (DAPC)

Lionel Landwerlin lionel.g.landwerlin at intel.com
Tue Aug 29 08:51:59 UTC 2017


Hi Sagar,

Thanks for writing this test. It looks promising but there are a few 
issues that needs to be addressed for this to run in CI.
Please have a look at the comments below.

Thanks!

On 28/08/17 10:53, Sagar Arun Kamble wrote:
> This test verifies different i915 perf sampling options for fields like
> PID, CTX ID, Timestamp, OA Report, TAG, MMIO.
>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
> ---
>   tests/Makefile.sources |    1 +
>   tests/dapc.c           | 1017 ++++++++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 1018 insertions(+)
>   create mode 100644 tests/dapc.c
>
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index bb013c7..61feb0d 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -26,6 +26,7 @@ TESTS_progs = \
>   	core_getversion \
>   	core_prop_blob \
>   	core_setmaster_vs_auth \
> +	dapc \
>   	debugfs_test \
>   	drm_import_export \
>   	drm_mm \
> diff --git a/tests/dapc.c b/tests/dapc.c
> new file mode 100644
> index 0000000..f49b1cd
> --- /dev/null
> +++ b/tests/dapc.c
> @@ -0,0 +1,1017 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * dapc: Driver Assisted Performance Capture
> + *	 This tests the i915 perf functionality to sample various metrics by
> + *	 associating with the CS stream or just standalone periodic OA samples.
> + *	 Verifies fields like PID, CTX ID, Timestamp, OA Report, MMIO, Tags are
> + *	 generated properly for each sample.
> + *
> + * Authors:
> + *   Sourab Gupta <sourab.gupta at intel.com>
> + *   Sagar Arun Kamble <sagar.a.kamble at intel.com>
> + *
> + */
> +#define _GNU_SOURCE
> +#include "xf86drm.h"
> +#include "i915_drm.h"
> +#include "igt_core.h"
> +#include <linux/perf_event.h>
> +#include <asm/unistd.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <sys/mman.h>
> +#include <sys/ioctl.h>
> +#include <dirent.h>
> +#include <limits.h>
> +#include <errno.h>
> +#include <stdint.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <assert.h>
> +#include <time.h>
> +

To be able to run this test in the continuous integration system, we 
need it to be autonomous.
The following macro requires user interaction. Unfortunately that won't 
work.
Please look at the other tests to create subtests and make sure we can 
run this in the CI.
Thanks!

> +#define COLLECT_DATA { \
> +	printf("(%s) Collecting data. ", __func__); \
> +	printf("Press enter to continue...\n"); \
> +	getc(stdin); \
> +}
> +

It would be good to test stream configurations with different sizes.
For example only Pid, or Tag & Pid or SourceInfo & ctx ID & Tag, etc...
And verify that we get reports with appropriate sizes.

> +#define OA_SAMPLE_SIZE_MAX	(8 +	/* drm_i915_perf_record_header */ \
> +				 8 +	/* source info */ \
> +				 8 +	/* ctx ID */ \
> +				 8 +	/* Pid */ \
> +				 8 +	/* Tag */ \
> +				 256) /* raw OA counter snapshot */
> +
> +#define TS_SAMPLE_SIZE_MAX	(8 +	/* drm_i915_perf_record_header */ \
> +				 8 +	/* ctx ID */ \
> +				 8 +	/* Pid */ \
> +				 8 +	/* Tag */ \
> +				 8)	/* Timestamp */ \
> +
> +#define TS_MMIO_SAMPLE_SIZE_MAX	(8 +   /* drm_i915_perf_record_header */ \
> +				 8 +   /* ctx ID */ \
> +				 8 +   /* Pid */ \
> +				 8 +   /* Tag */ \
> +				 8 +   /* Timestamp */ \
> +				 4*I915_PERF_MMIO_NUM_MAX)	/* MMIO reg */
> +
> +#define OA_TS_MMIO_SAMPLE_SIZE_MAX (8 +   /* drm_i915_perf_record_header */ \
> +				    8 +   /* source info */ \
> +				    8 +   /* ctx ID */ \
> +				    8 +   /* Pid */ \
> +				    8 +   /* Tag */ \
> +				    8 +   /* Timestamp */ \
> +				    (4*I915_PERF_MMIO_NUM_MAX) + /* MMIO reg*/ \
> +				    256) /* raw OA counter snapshot */
> +
> +#define READ_OA_BUF_SIZE_MAX		(100*OA_SAMPLE_SIZE_MAX)
> +#define READ_TS_BUF_SIZE_MAX		(100*TS_SAMPLE_SIZE_MAX)
> +#define READ_TS_MMIO_BUF_SIZE_MAX	(100*TS_MMIO_SAMPLE_SIZE_MAX)
> +#define READ_OA_TS_MMIO_BUF_SIZE_MAX	(100*OA_TS_MMIO_SAMPLE_SIZE_MAX)
> +
> +#define SAMPLE_OA	(1<<0)
> +#define SAMPLE_TS	(1<<1)
> +#define SAMPLE_MMIO	(1<<2)
> +
> +struct intel_device {
> +	uint32_t device;
> +	uint32_t subsystem_device;
> +	uint32_t subsystem_vendor;
> +};
> +
> +enum platform {
> +	ARCH_HSW,
> +	ARCH_BDW,
> +	ARCH_SKL,
> +} arch;
> +
> +/* DAPC OA samples read() from i915 perf */
> +struct dapc_oa_sample {
> +	struct drm_i915_perf_record_header header;
> +	uint64_t source_info;
> +	uint64_t ctx_id;
> +	uint64_t pid;
> +	uint64_t tag;
> +	uint8_t oa_report[];
> +};
> +
> +/* DAPC timestamp samples read() from i915 perf */
> +struct dapc_ts_sample {
> +	struct drm_i915_perf_record_header header;
> +	uint64_t ctx_id;
> +	uint64_t pid;
> +	uint64_t tag;
> +	uint64_t timestamp;
> +};
> +
> +/* DAPC timestamp + mmio samples read() from i915 perf */
> +struct dapc_ts_mmio_sample {
> +	struct drm_i915_perf_record_header header;
> +	uint64_t ctx_id;
> +	uint64_t pid;
> +	uint64_t tag;
> +	uint64_t timestamp;
> +	uint32_t mmio[2];
> +};
> +
> +/* DAPC OA + timestamp + mmio samples read() from i915 perf */
> +struct dapc_oa_ts_mmio_sample {
> +	struct drm_i915_perf_record_header header;
> +	uint64_t source_info;
> +	uint64_t ctx_id;
> +	uint64_t pid;
> +	uint64_t tag;
> +	uint64_t timestamp;
> +
> +	/*
> +	 * Hardcoding 2 here since the array size would depend on no. of mmio
> +	 * values queried. TODO: Find a better way to do this.
> +	 */
> +	uint32_t mmio[2];
> +	uint8_t oa_report[];
> +};
> +
> +struct i915_oa_format {
> +	int format;
> +	int size;
> +};
> +
> +static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
> +	[I915_OA_FORMAT_A13]	    = { 0, 64 },
> +	[I915_OA_FORMAT_A29]	    = { 1, 128 },
> +	[I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
> +	/* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
> +	[I915_OA_FORMAT_B4_C8]	    = { 4, 64 },
> +	[I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
> +	[I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
> +	[I915_OA_FORMAT_C4_B8]	    = { 7, 64 },
> +};
> +
> +static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
> +	[I915_OA_FORMAT_A12]		    = { 0, 64 },
> +	[I915_OA_FORMAT_A12_B8_C8]	    = { 2, 128 },
> +	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
> +	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
> +};
> +

I'm not sure it's worth testing all the configs, maybe just limit the 
test to TestOa.
You can pickup the uuids in perf.c.

> +static const char * const hsw_guids[] = {
> +	"403d8832-1a27-4aa6-a64e-f5389ce7b212",
> +	"39ad14bc-2380-45c4-91eb-fbcb3aa7ae7b",
> +	"3865be28-6982-49fe-9494-e4d1b4795413",
> +	"bb5ed49b-2497-4095-94f6-26ba294db88a",
> +	"3358d639-9b5f-45ab-976d-9b08cbfc6240",
> +	"bc274488-b4b6-40c7-90da-b77d7ad16189",
> +};
> +
> +/*
> + * Need to update GUID based on latest i915 configuration. Currently
> + * first GUID is being tested.
> + */
> +static const char * const skl_guids[] = {
> +	"1651949f-0ac0-4cb1-a06f-dafd74a407d1",
> +	"f519e481-24d2-4d42-87c9-3fdd12c00202",
> +	"fdfc01cc-e28e-423a-aae0-b5ed5d4d7a9f",
> +	"c9c7ace5-614a-4f8e-90c7-30064c36cad2",
> +	"99797dc2-b48f-4d83-b973-613cff01202b",
> +	"afa148ea-77fb-48ee-b8f8-e5e971ecf589",
> +	"bfce7061-e6f1-4a78-bed8-c9cc69af70f9",
> +	"c35ddcab-b1f2-452f-969a-a8209d531a00",
> +	"2b0d0c83-706a-4cb6-b55e-d6bcf51fa6d3",
> +	"d084f6a9-f706-4b74-b98c-65daa5340517",
> +	"c7ed493c-54ff-4152-baf4-07e31e7a24cb",
> +	"43ad9300-198a-4734-8f3a-2a2151b9dab6",
> +	"ccfce3f2-6c63-4630-a043-f2a0243fed8f",
> +	"2e564b28-98fa-42a0-8bbc-7915de3cc03c",
> +	"a305533f-7e36-4fb6-8749-c6280bce3457",
> +	"34ecd59f-6b52-4004-916f-afe9530a0442",
> +	"ee1990d9-6e93-4c7c-aa9e-b40e1ec4d41b",
> +};
> +
> +static struct intel_device intel_dev;
> +static int drm_fd = -1;
> +static int drm_card = -1;
> +static int perf_event_fd_rcs = -1;
> +
> +static uint64_t read_file_uint64(const char *file)
> +{
> +	char buf[32];
> +	int fd, n;
> +
> +	fd = open(file, 0);
> +	if (fd < 0)
> +		return 0;
> +	n = read(fd, buf, sizeof(buf) - 1);
> +	close(fd);
> +	if (n < 0)
> +		return 0;
> +
> +	buf[n] = '\0';
> +	return strtoull(buf, 0, 0);
> +}
> +
> +static uint32_t read_device_param(int id, const char *param)
> +{
> +	char *name;
> +	int ret = asprintf(&name, "/sys/class/drm/renderD%u/device/%s",
> +			   id, param);
> +	uint32_t value;
> +
> +	assert(ret != -1);
> +
> +	value = read_file_uint64(name);
> +	free(name);
> +
> +	return value;
> +}
> +
> +static int get_card_for_fd(int fd)
> +{
> +	struct stat sb;
> +	int mjr, mnr;
> +	char buffer[128];
> +	DIR *drm_dir;
> +	int entry_size;
> +	struct dirent *entry1, *entry2;
> +	int name_max;
> +
> +	if (fstat(fd, &sb)) {
> +		printf("Failed to stat DRM fd\n");
> +		return -1;
> +	}
> +
> +	mjr = major(sb.st_rdev);
> +	mnr = minor(sb.st_rdev);
> +
> +	snprintf(buffer, sizeof(buffer), "/sys/dev/char/%d:%d/device/drm",
> +		 mjr, mnr);
> +
> +	drm_dir = opendir(buffer);
> +	assert(drm_dir != NULL);
> +
> +	name_max = pathconf(buffer, _PC_NAME_MAX);
> +
> +	if (name_max == -1)
> +		name_max = 255;
> +
> +	entry_size = 256;
> +	entry1 = alloca(entry_size);
> +
> +	while ((readdir_r(drm_dir, entry1, &entry2) == 0) && entry2 != NULL)
> +		if (entry2->d_type == DT_DIR &&
> +		    strncmp(entry2->d_name, "card", 4) == 0)
> +			return strtoull(entry2->d_name + 4, NULL, 10);
> +
> +	return -1;
> +}
> +
> +

You can replace the following function with __drm_open_driver_render.

> +static int open_render_node(struct intel_device *dev)
> +{
> +	char *name;
> +	int i, fd;
> +
> +	for (i = 128; i < (128 + 16); i++) {
> +		int ret;
> +
> +		ret = asprintf(&name, "/dev/dri/renderD%u", i);
> +		assert(ret != -1);
> +
> +		fd = open(name, O_RDWR);
> +		free(name);
> +
> +		if (fd == -1)
> +			continue;
> +
> +		if (read_device_param(i, "vendor") != 0x8086) {
> +			close(fd);
> +			fd = -1;
> +			continue;
> +		}
> +
> +		dev->device = read_device_param(i, "device");
> +		dev->subsystem_device = read_device_param(i,
> +							  "subsystem_device");
> +		dev->subsystem_vendor = read_device_param(i,
> +							  "subsystem_vendor");
> +
> +		return fd;
> +	}
> +
> +	return fd;
> +}

The following function can be replaced by igt_ioctl().

> +
> +/* Handle restarting ioctl if interrupted... */
> +static int perf_ioctl(int fd, unsigned long request, void *arg)
> +{
> +	int ret;
> +
> +	do {
> +		ret = ioctl(fd, request, arg);
> +	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
> +
> +	return ret;
> +}
> +
> +int read_perf_dapc_samples(uint8_t *temp_buf, uint8_t *out_data,
> +			   uint8_t sample_flags, int fd)
> +{
> +	int count, max_read_size = 16*1024*1024, size_copied = 0, offset = 0;
> +
> +	if (sample_flags & SAMPLE_OA)
> +		max_read_size = READ_OA_BUF_SIZE_MAX;
> +	else if (sample_flags & SAMPLE_TS)
> +		max_read_size = READ_TS_BUF_SIZE_MAX;
> +	else if (sample_flags & (SAMPLE_TS|SAMPLE_MMIO))
> +		max_read_size = READ_TS_MMIO_BUF_SIZE_MAX;
> +	else if (sample_flags & (SAMPLE_OA|SAMPLE_TS|SAMPLE_MMIO)) {
> +		max_read_size = READ_OA_TS_MMIO_BUF_SIZE_MAX;
> +	} else {
> +		printf("Unknown sample flags: %d\n", sample_flags);
> +		return -1;
> +	}
> +
> +	count = read(fd, temp_buf, max_read_size);
> +
> +	if (count < 0) {
> +		printf("Error reading i915 OA event stream. Errno:%d", errno);
> +		perror("Error : ");
> +		return count;
> +	}
> +
> +	if (count == 0)
> +		return 0;
> +
> +	while (offset < count) {
> +		struct drm_i915_perf_record_header *header =
> +		(struct drm_i915_perf_record_header *)(temp_buf + offset);
> +
> +		if (header->size == 0) {
> +			printf("Spurious header size == 0\n");
> +			/* XXX: How should we handle this instead of exiting()*/
> +			exit(1);
> +		}
> +
> +		offset += header->size;
> +
> +		switch (header->type) {
> +		case DRM_I915_PERF_RECORD_OA_BUFFER_LOST:
> +			printf("i915_oa: OA buffer overflow\n");
> +			break;
> +		case DRM_I915_PERF_RECORD_OA_REPORT_LOST:
> +			printf("i915_oa: OA report lost\n");
> +			break;
> +		case DRM_I915_PERF_RECORD_SAMPLE:
> +			if (sample_flags & SAMPLE_OA) {
> +				struct dapc_oa_sample *sample =
> +					(struct dapc_oa_sample *)header;
> +
> +				if (sample->source_info ==
> +				    I915_PERF_SAMPLE_OA_SOURCE_RCS) {
> +					/* DAPC sample */
> +					printf("DAPC OA sample\n");
> +				} else {
> +					/* Periodic sample. No need to copy */
> +					printf("Periodic sample\n");
> +					continue;
> +				}
> +			}
> +			memcpy(out_data + size_copied, header, header->size);
> +			size_copied += header->size;
> +			break;
> +		default:
> +			printf("i915_oa: Spurious header type = %d\n",
> +			       header->type);
> +		}
> +	}
> +
> +	return size_copied;
> +}
> +
> +bool read_metrics_id_from_sysfs(int *metrics_id)
> +{
> +	char buffer[128];
> +	const char *guid;
> +
> +	assert(drm_card >= 0);
> +
> +	/*
> +	 * Select render basic metrics ID - i.e. first guid, from the arch
> +	 * specific guids.
> +	 */
> +	switch (arch) {
> +	case ARCH_HSW:
> +		guid = hsw_guids[0];
> +		break;

You defined ARCH_BDW, but it appears to be missing here.

> +	case ARCH_SKL:
> +		guid = skl_guids[0];
> +		break;
> +	default:
> +		printf("guid not found for the arch\n");
> +		return false;
> +	}
> +
> +	snprintf(buffer, sizeof(buffer),
> +		 "/sys/class/drm/card%d/metrics/%s/id",
> +		 drm_card, guid);
> +	*metrics_id = read_file_uint64(buffer);
> +
> +	return true;
> +}
> +
> +static void open_i915_rcs_oa_stream(int report_format, int metrics_id)
> +{
> +	int period_exponent = 16;//0;
> +	int ring_id = I915_EXEC_RENDER; /* RCS */
> +	struct drm_i915_perf_open_param param;
> +	uint64_t properties[] = {
> +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> +		DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_id,
> +		DRM_I915_PERF_PROP_OA_FORMAT, report_format,
> +		DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
> +		DRM_I915_PERF_PROP_ENGINE, ring_id,
> +		DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, true,
> +		DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true,
> +		DRM_I915_PERF_PROP_SAMPLE_PID, true,
> +		DRM_I915_PERF_PROP_SAMPLE_TAG, true,
> +	};

This test will potentially run on older kernels, where the feature 
you're adding won't be available.
It needs to handle this case an just skip the tests if opening the 
stream with newer options isn't supported.

> +	int fd;
> +
> +	memset(&param, 0, sizeof(param));
> +
> +	param.flags = 0;
> +	param.flags |= I915_PERF_FLAG_FD_CLOEXEC;
> +	param.flags |= I915_PERF_FLAG_FD_NONBLOCK;
> +
> +	param.properties_ptr = (uint64_t)properties;
> +	param.num_properties = sizeof(properties) / 16;
> +
> +	fd = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param);
> +
> +	if (fd == -1) {
> +		perror("Error opening i915 perf event : ");
> +		return;
> +	}
> +
> +	printf("Opened i915 perf event.\n");
> +	perf_event_fd_rcs = fd;
> +}
> +
> +static void open_i915_rcs_ts_stream(void)
> +{
> +	struct drm_i915_perf_open_param param;
> +	int ring_id = I915_EXEC_RENDER; /* RCS */
> +	uint64_t properties[] = {
> +		DRM_I915_PERF_PROP_ENGINE, ring_id,
> +		DRM_I915_PERF_PROP_SAMPLE_TS, true,
> +		DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true,
> +		DRM_I915_PERF_PROP_SAMPLE_PID, true,
> +		DRM_I915_PERF_PROP_SAMPLE_TAG, true,
> +	};
> +	int fd;
> +
> +	memset(&param, 0, sizeof(param));
> +
> +	param.flags = 0;
> +	param.flags |= I915_PERF_FLAG_FD_CLOEXEC;
> +	param.flags |= I915_PERF_FLAG_FD_NONBLOCK;
> +
> +	param.properties_ptr = (uint64_t)properties;
> +	param.num_properties = sizeof(properties) / 16;
> +
> +	fd = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param);
> +
> +	if (fd == -1) {
> +		perror("Error opening i915 perf event : ");
> +		return;
> +	}
> +
> +	printf("Opened i915 perf event.\n");
> +	perf_event_fd_rcs = fd;
> +}
> +
> +static void open_i915_rcs_ts_mmio_stream(
> +			struct drm_i915_perf_mmio_list *mmio_list)
> +{
> +	struct drm_i915_perf_open_param param;
> +	int ring_id = I915_EXEC_RENDER; /* RCS */
> +	uint64_t properties[] = {
> +		DRM_I915_PERF_PROP_ENGINE, ring_id,
> +		DRM_I915_PERF_PROP_SAMPLE_TS, true,
> +		DRM_I915_PERF_PROP_SAMPLE_MMIO, (uint64_t)mmio_list,
> +		DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true,
> +		DRM_I915_PERF_PROP_SAMPLE_PID, true,
> +		DRM_I915_PERF_PROP_SAMPLE_TAG, true,
> +	};
> +	int fd;
> +
> +	memset(&param, 0, sizeof(param));
> +
> +	param.flags = 0;
> +	param.flags |= I915_PERF_FLAG_FD_CLOEXEC;
> +	param.flags |= I915_PERF_FLAG_FD_NONBLOCK;
> +
> +	param.properties_ptr = (uint64_t)properties;
> +	param.num_properties = sizeof(properties) / 16;
> +
> +	fd = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param);
> +
> +	if (fd == -1) {
> +		perror("Error opening i915 perf event : ");
> +		return;
> +	}
> +
> +	printf("Opened i915 perf event.\n");
> +	perf_event_fd_rcs = fd;
> +}
> +
> +static void open_i915_rcs_oa_ts_mmio_stream(int report_format, int metrics_id,
> +			struct drm_i915_perf_mmio_list *mmio_list)
> +{
> +	int period_exponent = 16;//0;
> +	int ring_id = I915_EXEC_RENDER; /* RCS */
> +	struct drm_i915_perf_open_param param;
> +	uint64_t properties[] = {
> +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> +		DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_id,
> +		DRM_I915_PERF_PROP_OA_FORMAT, report_format,
> +		DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
> +		DRM_I915_PERF_PROP_ENGINE, ring_id,
> +		DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, true,
> +		DRM_I915_PERF_PROP_SAMPLE_TS, true,
> +		DRM_I915_PERF_PROP_SAMPLE_MMIO, (uint64_t)mmio_list,
> +		DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true,
> +		DRM_I915_PERF_PROP_SAMPLE_PID, true,
> +		DRM_I915_PERF_PROP_SAMPLE_TAG, true,
> +	};
> +	int fd;
> +
> +	memset(&param, 0, sizeof(param));
> +
> +	param.flags = 0;
> +	param.flags |= I915_PERF_FLAG_FD_CLOEXEC;
> +	param.flags |= I915_PERF_FLAG_FD_NONBLOCK;
> +
> +	param.properties_ptr = (uint64_t)properties;
> +	param.num_properties = sizeof(properties) / 16;
> +
> +	fd = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param);
> +
> +	if (fd == -1) {
> +		perror("Error opening i915 perf event : ");
> +		return;
> +	}
> +
> +	printf("Opened i915 perf event.\n");
> +	perf_event_fd_rcs = fd;
> +}
> +
> +static void open_i915_periodic_oa_stream(int report_format, int metrics_id)
> +{
> +	int period_exponent = 16;//0;
> +	struct drm_i915_perf_open_param param;
> +	uint64_t properties[] = {
> +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> +		DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_id,
> +		DRM_I915_PERF_PROP_OA_FORMAT, report_format,
> +		DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
> +		DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, true,
> +	};
> +	int fd;
> +
> +	memset(&param, 0, sizeof(param));
> +
> +	param.flags = 0;
> +	param.flags |= I915_PERF_FLAG_FD_CLOEXEC;
> +	param.flags |= I915_PERF_FLAG_FD_NONBLOCK;
> +
> +	param.properties_ptr = (uint64_t)properties;
> +	param.num_properties = sizeof(properties) / 16;
> +
> +	fd = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param);
> +
> +	if (fd == -1) {
> +		perror("Error opening i915 perf event : ");
> +		return;
> +	}
> +
> +	printf("Opened i915 perf event.\n");
> +	perf_event_fd_rcs = fd;
> +}
> +
> +static void close_i915_perf_stream(void)
> +{
> +	if (perf_event_fd_rcs != -1) {
> +		close(perf_event_fd_rcs);
> +		perf_event_fd_rcs = -1;
> +	}
> +}
> +
> +static void test_perf_dapc_rcs_oa(void)
> +{
> +	uint64_t report_format;
> +	int metrics_id;
> +	int size, report_size, current_size = 0;
> +	uint8_t *dapc_data, *temp_buffer, *current_hdr;
> +	int ret = 0;
> +
> +	if (arch == ARCH_HSW) {
> +		report_format = I915_OA_FORMAT_A29;
> +		report_size = hsw_oa_formats[report_format].size;
> +	} else {
> +		report_format = I915_OA_FORMAT_A12;
> +		report_size = gen8_plus_oa_formats[report_format].size;
> +	}
> +
> +	if (report_size < 0)
> +		return;
> +
> +	dapc_data = malloc(READ_OA_BUF_SIZE_MAX);
> +	temp_buffer = malloc(READ_OA_BUF_SIZE_MAX);
> +
> +	ret = read_metrics_id_from_sysfs(&metrics_id);
> +	if (!ret) {
> +		printf("Reading metrics id from sysfs failed\n");
> +		return;
> +	}
> +
> +	open_i915_rcs_oa_stream(report_format, metrics_id);
> +
> +	/* Collect samples */
> +	COLLECT_DATA;
> +
> +	/* Read samples */
> +	size = read_perf_dapc_samples(temp_buffer, dapc_data, SAMPLE_OA,
> +				      perf_event_fd_rcs);

What if size = -1?
This needs to be checked, in the other tests too.

> +
> +	current_hdr = dapc_data;
> +
> +	printf("size retrieved = %d\n", size);
> +	/* Verify the sanity of DAPC node headers */
> +	while (current_size < size) {
> +		struct dapc_oa_sample *sample = (struct dapc_oa_sample *)
> +					(current_hdr + current_size);
> +
> +		igt_assert(sample->pid != 0);

I'm not sure that's good enough.
There is a need to verify that the pid actually matches something we 
know is correct.

I would suggest to spawn a child process that emits a rendercopy and 
verify in the parent process that we get the number of written pixels in 
the A21 counter :

https://github.com/djdeath/intel-gpu-tools/blob/wip/djdeath/oa-next/tests/perf.c#L3593

It should verify that the reports containing the written pixels are 
tagged with the appropriate PID.

> +		printf("pid = %lu, ctx_id = %lu, OA(first 8 bytes):0x%llx, "
> +		       "oa ts=0x%x\n",
> +			sample->pid, sample->ctx_id,
> +			*(unsigned long long int *)sample->oa_report,
> +			*(uint32_t *)(sample->oa_report + 4));
> +
> +		current_size += sample->header.size;
> +	}
> +	close_i915_perf_stream();
> +
> +	free(dapc_data);
> +	free(temp_buffer);
> +}
> +
> +static void test_perf_dapc_rcs_ts(void)
> +{
> +
> +	uint64_t prev_ts = 0, init_ts = 0;
> +	int size, current_size = 0;
> +	uint8_t *dapc_data, *temp_buffer, *current_hdr;
> +
> +	dapc_data = malloc(READ_TS_BUF_SIZE_MAX);
> +	temp_buffer = malloc(READ_TS_BUF_SIZE_MAX);
> +
> +	memset(dapc_data, 0, READ_TS_BUF_SIZE_MAX);
> +	memset(temp_buffer, 0, READ_TS_BUF_SIZE_MAX);
> +
> +	open_i915_rcs_ts_stream();
> +
> +	/* Collect samples */
> +	COLLECT_DATA;
> +
> +	/* Read samples */
> +	size = read_perf_dapc_samples(temp_buffer, dapc_data, SAMPLE_TS,
> +				      perf_event_fd_rcs);
> +
> +	current_hdr = dapc_data;
> +
> +	printf("size collected = %d\n", size);
> +
> +	/* Verify the sanity of DAPC data */
> +	while (current_size < size) {
> +		struct dapc_ts_sample *sample = (struct dapc_ts_sample *)
> +						(current_hdr + current_size);
> +		uint64_t ts = sample->timestamp;
> +
> +		igt_assert(sample->pid != 0);
> +		printf("pid = %lu, ctx_id = %lu, tag=%lu, ts=0x%llx\n",
> +			sample->pid, sample->ctx_id, sample->tag,
> +			(unsigned long long)ts);
> +
> +		igt_assert(ts > init_ts);
> +		igt_assert(ts > prev_ts);
> +		prev_ts = ts;
> +
> +		current_size += sample->header.size;
> +	}
> +
> +	printf("total size read = %d\n", current_size);
> +	close_i915_perf_stream();
> +
> +	free(dapc_data);
> +	free(temp_buffer);
> +}
> +
> +
> +static void test_perf_dapc_rcs_ts_mmio(void)
> +{
> +
> +	uint64_t prev_ts = 0, init_ts = 0;
> +	int r, size, current_size = 0;
> +	uint8_t *dapc_data, *temp_buffer, *current_hdr;
> +	struct drm_i915_perf_mmio_list mmio;
> +
> +	dapc_data = malloc(READ_TS_MMIO_BUF_SIZE_MAX);
> +	temp_buffer = malloc(READ_TS_MMIO_BUF_SIZE_MAX);
> +
> +	memset(&mmio, 0, sizeof(mmio));
> +
> +#define GEN6_GT_GFX_RC6				0x138108

You can read the rc6 residency info from 
/sys/kernel/debug/dri/0/i915_drpc_info.
Maybe you could read it once there before opening the stream, then close 
the steam and read it once more.
Then verify that all the reports read have with the bounds of the 2 reads.

> +#define GEN6_GT_GFX_RC6p			0x13810C
> +	mmio.mmio_list[0] = GEN6_GT_GFX_RC6;
> +	mmio.mmio_list[1] = GEN6_GT_GFX_RC6p;
> +	mmio.num_mmio = 2;
> +
> +	open_i915_rcs_ts_mmio_stream(&mmio);
> +
> +	/* Collect samples */
> +	COLLECT_DATA;
> +
> +	/* Read samples */
> +	size = read_perf_dapc_samples(temp_buffer, dapc_data,
> +				      SAMPLE_TS|SAMPLE_MMIO, perf_event_fd_rcs);
> +
> +	current_hdr = dapc_data;
> +
> +	printf("size collected = %d\n", size);
> +	/* Verify the sanity of DAPC data */
> +	while (current_size < size) {
> +		struct dapc_ts_mmio_sample *sample =
> +						(struct dapc_ts_mmio_sample *)
> +						(current_hdr + current_size);
> +		uint64_t ts = sample->timestamp;
> +
> +		igt_assert(sample->pid != 0);
> +		printf("pid = %lu, ctx_id = %lu, ts=0x%llx\n",
> +			sample->pid, sample->ctx_id,
> +			(unsigned long long)ts);
> +
> +		igt_assert(ts > init_ts);
> +		igt_assert(ts > prev_ts);
> +		prev_ts = ts;
> +
> +		for (r = 0; r < mmio.num_mmio; r++) {
> +			printf("mmio 0x%08X = 0x%08X\n",
> +			mmio.mmio_list[r],
> +			sample->mmio[r]);
> +		}
> +
> +		current_size += sample->header.size;
> +	}
> +
> +	printf("total size read = %d\n", current_size);
> +	close_i915_perf_stream();
> +
> +	free(dapc_data);
> +	free(temp_buffer);
> +}
> +
> +static void test_perf_dapc_rcs_oa_ts_mmio(void)
> +{
> +
> +	uint64_t report_format;
> +	uint64_t prev_ts = 0, init_ts = 0;
> +	int r, report_size, size, metrics_id, current_size = 0;
> +	uint8_t *dapc_data, *temp_buffer, *current_hdr;
> +	struct drm_i915_perf_mmio_list mmio;
> +	int ret = 0;
> +
> +	if (arch == ARCH_HSW) {
> +		report_format = I915_OA_FORMAT_A29;
> +		report_size = hsw_oa_formats[report_format].size;
> +	} else {
> +		report_format = I915_OA_FORMAT_A12;
> +		report_size = gen8_plus_oa_formats[report_format].size;
> +	}
> +
> +	if (report_size < 0)
> +		return;
> +
> +	dapc_data = malloc(READ_OA_TS_MMIO_BUF_SIZE_MAX);
> +	temp_buffer = malloc(READ_OA_TS_MMIO_BUF_SIZE_MAX);
> +
> +	memset(&mmio, 0, sizeof(mmio));
> +
> +#define GEN6_GT_GFX_RC6				0x138108
> +#define GEN6_GT_GFX_RC6p			0x13810C
> +	mmio.mmio_list[0] = GEN6_GT_GFX_RC6;
> +	mmio.mmio_list[1] = GEN6_GT_GFX_RC6p;
> +	mmio.num_mmio = 2;
> +
> +	ret = read_metrics_id_from_sysfs(&metrics_id);
> +	if (!ret) {
> +		printf("Reading metrics id from sysfs failed\n");
> +		return;
> +	}
> +
> +	open_i915_rcs_oa_ts_mmio_stream(report_format, metrics_id, &mmio);
> +
> +	/* Collect samples */
> +	COLLECT_DATA;
> +
> +	/* Read samples */
> +	size = read_perf_dapc_samples(temp_buffer, dapc_data,
> +				      SAMPLE_OA|SAMPLE_TS|SAMPLE_MMIO,
> +				      perf_event_fd_rcs);
> +
> +	current_hdr = dapc_data;
> +
> +	printf("size collected = %d\n", size);
> +	/* Verify the sanity of DAPC data */
> +	while (current_size < size) {
> +		struct dapc_oa_ts_mmio_sample *sample =
> +				(struct dapc_oa_ts_mmio_sample *)
> +						(current_hdr + current_size);
> +		uint64_t ts = sample->timestamp;
> +
> +		igt_assert(sample->pid != 0);
> +		printf("pid = %lu, ctx_id = %lu, ts=0x%llx\n",
> +			sample->pid, sample->ctx_id,
> +			(unsigned long long)ts);
> +
> +		igt_assert(ts > init_ts);
> +		igt_assert(ts > prev_ts);
> +		prev_ts = ts;
> +
> +		for (r = 0; r < mmio.num_mmio; r++) {
> +			printf("mmio 0x%08X = 0x%08X\n",
> +			mmio.mmio_list[r], sample->mmio[r]);
> +		}
> +
> +		current_size += sample->header.size;
> +		printf("current size = %d\n", current_size);
> +	}
> +
> +	printf("total size read = %d\n", current_size);
> +	close_i915_perf_stream();
> +
> +	free(dapc_data);
> +	free(temp_buffer);
> +}
> +
> +static void test_perf_dapc_periodic_oa(void)
> +{
> +	uint64_t report_format;
> +	int size, report_size, metrics_id;
> +	uint8_t *dapc_data, *temp_buffer;
> +	int ret = 0;
> +
> +	if (arch == ARCH_HSW) {
> +		report_format = I915_OA_FORMAT_A29;
> +		report_size = hsw_oa_formats[report_format].size;
> +	} else {
> +		report_format = I915_OA_FORMAT_A12;
> +		report_size = gen8_plus_oa_formats[report_format].size;
> +	}
> +
> +	if (report_size < 0)
> +		return;
> +
> +	dapc_data = malloc(READ_OA_BUF_SIZE_MAX);
> +	temp_buffer = malloc(READ_OA_BUF_SIZE_MAX);
> +
> +	ret = read_metrics_id_from_sysfs(&metrics_id);
> +	if (!ret) {
> +		printf("Reading metrics id from sysfs failed\n");
> +		return;
> +	}
> +
> +	open_i915_periodic_oa_stream(report_format, metrics_id);
> +
> +	/* Collect samples */
> +	COLLECT_DATA;
> +
> +	/* Read samples */
> +	size = read_perf_dapc_samples(temp_buffer, dapc_data, SAMPLE_OA,
> +				      perf_event_fd_rcs);

What are you testing here?

> +
> +	close_i915_perf_stream();
> +
> +	free(dapc_data);
> +	free(temp_buffer);
> +}
> +
> +static bool
> +initialize(void)
> +{
> +
> +	if (intel_dev.device)
> +		return true;
> +
> +	drm_fd = open_render_node(&intel_dev);
> +	if (drm_fd < 0) {
> +		printf("Failed to open render node\n");
> +		return false;
> +	}
> +
> +	drm_card = get_card_for_fd(drm_fd);
> +	if (drm_card < 0) {
> +		printf("Failed to get drm card info\n");
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	bool ret;
> +	int option;
> +	int platform;
> +
> +	if (argc != 3) {
> +		printf("Usage: \n./dapc <Platform> <Test_mode>\
> +		\nPlatform: 0-HSW, 1-BDW, 2-SKL\n\
> +		\nTest_mode:\n\
> +		\t0 - RCS OA mode\n\
> +		\t1 - RCS TS mode\n\
> +		\t2 - RCS TS+MMIO mode\n\
> +		\t3 - RCS OA+TS+MMIO mode\n\
> +		\t4 - Periodic OA mode\n");
> +		return 0;
> +	}
> +
> +	ret = initialize();
> +	if (!ret)
> +		return -1;
> +
> +	platform = atoi(argv[1]);
> +	switch (platform) {
> +	case 0:
> +		arch = ARCH_HSW;
> +		break;
> +	case 1:
> +		arch = ARCH_BDW;
> +		break;
> +	case 2:
> +		arch = ARCH_SKL;
> +		break;
> +	default:
> +		fprintf(stderr, "Invalid platform:%d\n", platform);
> +		return -1;
> +	}
> +
> +	option = atoi(argv[2]);
> +	switch (option) {
> +	case 0:
> +		test_perf_dapc_rcs_oa();
> +		break;
> +	case 1:
> +		test_perf_dapc_rcs_ts();
> +		break;
> +	case 2:
> +		test_perf_dapc_rcs_ts_mmio();
> +		break;
> +	case 3:
> +		test_perf_dapc_rcs_oa_ts_mmio();
> +		break;
> +	case 4:
> +		test_perf_dapc_periodic_oa();
> +		break;
> +	default:
> +		fprintf(stderr, "Invalid Option:%d\n", option);
> +		return -1;
> +	}
> +
> +	return 0;
> +}




More information about the Intel-gfx mailing list