[Intel-gfx] [RFC i-g-t 1/2] intel-gpu-overlay: Use new class/instance based PMU API

Petri Latvala petri.latvala at intel.com
Wed Aug 2 12:58:38 UTC 2017


Uses new uapi tokens, please add those as LOCAL_* definitions.


--
Petri Latvala


On Wed, Aug 02, 2017 at 01:33:11PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> So it works with the i915 PMU RFC kernel patches.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>  overlay/gem-interrupts.c |  2 +-
>  overlay/gpu-freq.c       |  4 +--
>  overlay/gpu-top.c        | 51 +++++++++++++++++++++---------------
>  overlay/perf.h           | 68 +++++++++++++++++++++++++++++-------------------
>  overlay/power.c          |  2 +-
>  overlay/rc6.c            |  6 ++---
>  6 files changed, 78 insertions(+), 55 deletions(-)
> 
> diff --git a/overlay/gem-interrupts.c b/overlay/gem-interrupts.c
> index 0150a1d03825..f7c88d4822a5 100644
> --- a/overlay/gem-interrupts.c
> +++ b/overlay/gem-interrupts.c
> @@ -44,7 +44,7 @@ static int perf_open(void)
>  	attr.type = i915_type_id();
>  	if (attr.type == 0)
>  		return -ENOENT;
> -	attr.config = I915_PERF_INTERRUPTS;
> +	attr.config = I915_PMU_INTERRUPTS;
>  
>  	return perf_event_open(&attr, -1, 0, -1, 0);
>  }
> diff --git a/overlay/gpu-freq.c b/overlay/gpu-freq.c
> index 321c93882238..bfd79760a7c8 100644
> --- a/overlay/gpu-freq.c
> +++ b/overlay/gpu-freq.c
> @@ -54,8 +54,8 @@ static int perf_open(void)
>  {
>  	int fd;
>  
> -	fd = perf_i915_open(I915_PERF_ACTUAL_FREQUENCY, -1);
> -	if (perf_i915_open(I915_PERF_REQUESTED_FREQUENCY, fd) < 0) {
> +	fd = perf_i915_open(I915_PMU_ACTUAL_FREQUENCY, -1);
> +	if (perf_i915_open(I915_PMU_REQUESTED_FREQUENCY, fd) < 0) {
>  		close(fd);
>  		fd = -1;
>  	}
> diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
> index 891a7ea7c0b1..fa3d8c5d4e3c 100644
> --- a/overlay/gpu-top.c
> +++ b/overlay/gpu-top.c
> @@ -42,11 +42,6 @@
>  #define   RING_WAIT		(1<<11)
>  #define   RING_WAIT_SEMAPHORE	(1<<10)
>  
> -#define __I915_PERF_RING(n) (4*n)
> -#define I915_PERF_RING_BUSY(n) (__I915_PERF_RING(n) + 0)
> -#define I915_PERF_RING_WAIT(n) (__I915_PERF_RING(n) + 1)
> -#define I915_PERF_RING_SEMA(n) (__I915_PERF_RING(n) + 2)
> -
>  static int perf_i915_open(int config, int group)
>  {
>  	struct perf_event_attr attr;
> @@ -67,39 +62,53 @@ static int perf_i915_open(int config, int group)
>  
>  static int perf_init(struct gpu_top *gt)
>  {
> -	const char *names[] = {
> -		"RCS",
> -		"BCS",
> -		"VCS0",
> -		"VCS1",
> -		NULL,
> +	struct engine_desc {
> +		unsigned class, instance;
> +		const char *name;
> +	} *d, engines[] = {
> +		{ I915_ENGINE_CLASS_RENDER, 0, "rcs0" },
> +		{ I915_ENGINE_CLASS_COPY, 0, "bcs0" },
> +		{ I915_ENGINE_CLASS_VIDEO, 0, "vcs0" },
> +		{ I915_ENGINE_CLASS_VIDEO, 1, "vcs1" },
> +		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "vecs0" },
> +		{ 0, 0, NULL }
>  	};
> -	int n;
>  
> -	gt->fd = perf_i915_open(I915_PERF_RING_BUSY(0), -1);
> +	d = &engines[0];
> +
> +	gt->fd = perf_i915_open(I915_PMU_ENGINE_BUSY(d->class, d->instance),
> +				-1);
>  	if (gt->fd < 0)
>  		return -1;
>  
> -	if (perf_i915_open(I915_PERF_RING_WAIT(0), gt->fd) >= 0)
> +	if (perf_i915_open(I915_PMU_ENGINE_WAIT(d->class, d->instance),
> +			   gt->fd) >= 0)
>  		gt->have_wait = 1;
>  
> -	if (perf_i915_open(I915_PERF_RING_SEMA(0), gt->fd) >= 0)
> +	if (perf_i915_open(I915_PMU_ENGINE_SEMA(d->class, d->instance),
> +			   gt->fd) >= 0)
>  		gt->have_sema = 1;
>  
> -	gt->ring[0].name = names[0];
> +	gt->ring[0].name = d->name;
>  	gt->num_rings = 1;
>  
> -	for (n = 1; names[n]; n++) {
> -		if (perf_i915_open(I915_PERF_RING_BUSY(n), gt->fd) >= 0) {
> +	for (d++; d->name; d++) {
> +		if (perf_i915_open(I915_PMU_ENGINE_BUSY(d->class, d->instance),
> +				   gt->fd) >= 0) {
>  			if (gt->have_wait &&
> -			    perf_i915_open(I915_PERF_RING_WAIT(n), gt->fd) < 0)
> +			    perf_i915_open(I915_PMU_ENGINE_WAIT(d->class,
> +								d->instance),
> +					   gt->fd) < 0)
>  				return -1;
>  
>  			if (gt->have_sema &&
> -			    perf_i915_open(I915_PERF_RING_SEMA(n), gt->fd) < 0)
> +			    perf_i915_open(I915_PMU_ENGINE_SEMA(d->class,
> +								d->instance),
> +					   gt->fd) < 0)
>  				return -1;
>  
> -			gt->ring[gt->num_rings++].name = names[n];
> +			gt->ring[gt->num_rings++].name = d->name;
> +printf("%s busy = %x\n", d->name, I915_PMU_ENGINE_BUSY(d->class, d->instance));
>  		}
>  	}
>  
> diff --git a/overlay/perf.h b/overlay/perf.h
> index c44e65f9734c..2270f435fbc3 100644
> --- a/overlay/perf.h
> +++ b/overlay/perf.h
> @@ -3,41 +3,55 @@
>  
>  #include <linux/perf_event.h>
>  
> -#define I915_SAMPLE_BUSY	0
> -#define I915_SAMPLE_WAIT	1
> -#define I915_SAMPLE_SEMA	2
> +enum drm_i915_gem_engine_class {
> +	I915_ENGINE_CLASS_OTHER = 0,
> +	I915_ENGINE_CLASS_RENDER = 1,
> +	I915_ENGINE_CLASS_COPY = 2,
> +	I915_ENGINE_CLASS_VIDEO = 3,
> +	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
> +	I915_ENGINE_CLASS_MAX /* non-ABI */
> +};
>  
> -#define I915_SAMPLE_RCS		0
> -#define I915_SAMPLE_VCS		1
> -#define I915_SAMPLE_BCS		2
> -#define I915_SAMPLE_VECS	3
> +enum drm_i915_pmu_engine_sample {
> +	I915_SAMPLE_QUEUED = 0,
> +	I915_SAMPLE_BUSY = 1,
> +	I915_SAMPLE_WAIT = 2,
> +	I915_SAMPLE_SEMA = 3
> +};
>  
> -#define __I915_PERF_COUNT(ring, id) ((ring) << 4 | (id))
> +#define I915_PMU_SAMPLE_BITS (4)
> +#define I915_PMU_SAMPLE_MASK (0xf)
> +#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
> +#define I915_PMU_CLASS_SHIFT \
> +	(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
>  
> -#define I915_PERF_COUNT_RCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY)
> -#define I915_PERF_COUNT_RCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT)
> -#define I915_PERF_COUNT_RCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA)
> +#define __I915_PMU_ENGINE(class, instance, sample) \
> +	((class) << I915_PMU_CLASS_SHIFT | \
> +	(instance) << I915_PMU_SAMPLE_BITS | \
> +	(sample))
>  
> -#define I915_PERF_COUNT_VCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY)
> -#define I915_PERF_COUNT_VCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT)
> -#define I915_PERF_COUNT_VCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA)
> +#define I915_PMU_ENGINE_QUEUED(class, instance) \
> +	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
>  
> -#define I915_PERF_COUNT_BCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY)
> -#define I915_PERF_COUNT_BCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT)
> -#define I915_PERF_COUNT_BCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA)
> +#define I915_PMU_ENGINE_BUSY(class, instance) \
> +	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
>  
> -#define I915_PERF_COUNT_VECS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY)
> -#define I915_PERF_COUNT_VECS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT)
> -#define I915_PERF_COUNT_VECS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA)
> +#define I915_PMU_ENGINE_WAIT(class, instance) \
> +	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
>  
> -#define I915_PERF_ACTUAL_FREQUENCY 32
> -#define I915_PERF_REQUESTED_FREQUENCY 33
> -#define I915_PERF_ENERGY 34
> -#define I915_PERF_INTERRUPTS 35
> +#define I915_PMU_ENGINE_SEMA(class, instance) \
> +	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
>  
> -#define I915_PERF_RC6_RESIDENCY		40
> -#define I915_PERF_RC6p_RESIDENCY	41
> -#define I915_PERF_RC6pp_RESIDENCY	42
> +#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
> +
> +#define I915_PMU_ACTUAL_FREQUENCY 	__I915_PMU_OTHER(0)
> +#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
> +#define I915_PMU_ENERGY			__I915_PMU_OTHER(2)
> +#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(3)
> +
> +#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(4)
> +#define I915_PMU_RC6p_RESIDENCY		__I915_PMU_OTHER(5)
> +#define I915_PMU_RC6pp_RESIDENCY	__I915_PMU_OTHER(6)
>  
>  static inline int
>  perf_event_open(struct perf_event_attr *attr,
> diff --git a/overlay/power.c b/overlay/power.c
> index 2f1521b82cd6..b84082e8cef0 100644
> --- a/overlay/power.c
> +++ b/overlay/power.c
> @@ -46,7 +46,7 @@ static int perf_open(void)
>  	attr.type = i915_type_id();
>  	if (attr.type == 0)
>  		return -1;
> -	attr.config = I915_PERF_ENERGY;
> +	attr.config = I915_PMU_ENERGY;
>  
>  	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
>  	return perf_event_open(&attr, -1, 0, -1, 0);
> diff --git a/overlay/rc6.c b/overlay/rc6.c
> index d7047c2f4880..4809bbd8b0b7 100644
> --- a/overlay/rc6.c
> +++ b/overlay/rc6.c
> @@ -60,15 +60,15 @@ static int perf_open(unsigned *flags)
>  {
>  	int fd;
>  
> -	fd = perf_i915_open(I915_PERF_RC6_RESIDENCY, -1);
> +	fd = perf_i915_open(I915_PMU_RC6_RESIDENCY, -1);
>  	if (fd < 0)
>  		return -1;
>  
>  	*flags |= RC6;
> -	if (perf_i915_open(I915_PERF_RC6p_RESIDENCY, fd) >= 0)
> +	if (perf_i915_open(I915_PMU_RC6p_RESIDENCY, fd) >= 0)
>  		*flags |= RC6p;
>  
> -	if (perf_i915_open(I915_PERF_RC6pp_RESIDENCY, fd) >= 0)
> +	if (perf_i915_open(I915_PMU_RC6pp_RESIDENCY, fd) >= 0)
>  		*flags |= RC6pp;
>  
>  	return fd;
> -- 
> 2.9.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list