[Intel-gfx] [PATCH 1/1] drm/i915/perf: Map OA buffer to user space

Lionel Landwerlin lionel.g.landwerlin at intel.com
Thu Jul 16 15:32:10 UTC 2020


On 14/07/2020 10:22, Umesh Nerlige Ramappa wrote:
> From: Piotr Maciejewski <piotr.maciejewski at intel.com>
>
> i915 used to support time based sampling mode which is good for overall
> system monitoring, but is not enough for query mode used to measure a
> single draw call or dispatch. Gen9-Gen11 are using current i915 perf
> implementation for query, but Gen12+ requires a new approach based on
> triggered reports within oa buffer. In order to enable above feature
> two changes are required:
>
> 1. Whitelist update:
> - enable triggered reports within oa buffer
> - reading oa buffer head/tail/status information
> - reading gpu ticks counter.

I would break the patch into feature sets :

     - 1 patch to whitelist the trigger registers

     - 1 patch to whitelist OA counters & tail/head/... registers

     - 1 patch for mmap feature


Here are some IGT tests for the trigger feature : 
https://patchwork.freedesktop.org/series/75311/


We should verify that when not i915-perf is not active the whitelisted 
OA counters are not incrementing.

Otherwise we're starting to leak information that was not available before.


>
> 2. Map oa buffer at umd driver level to solve below constraints related
>     to time based sampling interface:
> - longer time to access reports collected by oa buffer
> - slow oa reports browsing since oa buffer size is large
> - missing oa report index, so query cannot browse report directly
> - with direct access to oa buffer, query can extract other useful
>    reports like context switch information needed to calculate correct
>    performance counters values.
>
> Signed-off-by: Piotr Maciejewski <piotr.maciejewski at intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_workarounds.c |  54 ++++++++
>   drivers/gpu/drm/i915/i915_perf.c            | 130 +++++++++++++++++++-
>   drivers/gpu/drm/i915/i915_perf_types.h      |  13 ++
>   drivers/gpu/drm/i915/i915_reg.h             |  14 +++
>   include/uapi/drm/i915_drm.h                 |  19 +++
>   5 files changed, 227 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 5726cd0a37e0..cf89928fc3a5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1365,6 +1365,48 @@ whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
>   	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
>   }
>   
> +static void gen9_whitelist_build_performance_counters(struct i915_wa_list *w)
> +{
> +	/* OA buffer trigger report 2/6 used by performance query */
> +	whitelist_reg(w, OAREPORTTRIG2);
> +	whitelist_reg(w, OAREPORTTRIG6);
> +
> +	/* Performance counters A18-20 used by tbs marker query */
> +	whitelist_reg_ext(w, OA_PERF_COUNTER_A18,
> +			  RING_FORCE_TO_NONPRIV_ACCESS_RW |
> +			  RING_FORCE_TO_NONPRIV_RANGE_16);

I would actually whitelist the entire set of OA_PERF_COUNTER (0-36).

I would like to make use of them in Mesa for the Vulkan driver.

> +
> +	/* Read access to gpu ticks */
> +	whitelist_reg_ext(w, GEN8_GPU_TICKS,
> +			  RING_FORCE_TO_NONPRIV_ACCESS_RD);
> +
> +	/* Read access to: oa status, head, tail, buffer settings */
> +	whitelist_reg_ext(w, GEN8_OASTATUS,
> +			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
> +			  RING_FORCE_TO_NONPRIV_RANGE_4);
> +}
> +
> +static void gen12_whitelist_build_performance_counters(struct i915_wa_list *w)
> +{
> +	/* OA buffer trigger report 2/6 used by performance query */
> +	whitelist_reg(w, GEN12_OAG_OAREPORTTRIG2);
> +	whitelist_reg(w, GEN12_OAG_OAREPORTTRIG6);
> +
> +	/* Performance counters A18-20 used by tbs marker query */
> +	whitelist_reg_ext(w, GEN12_OAG_PERF_COUNTER_A18,
> +			  RING_FORCE_TO_NONPRIV_ACCESS_RW |
> +			  RING_FORCE_TO_NONPRIV_RANGE_16);
> +
> +	/* Read access to gpu ticks */
> +	whitelist_reg_ext(w, GEN12_OAG_GPU_TICKS,
> +			  RING_FORCE_TO_NONPRIV_ACCESS_RD);
> +
> +	/* Read access to: oa status, head, tail, buffer settings */
> +	whitelist_reg_ext(w, GEN12_OAG_OASTATUS,
> +			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
> +			  RING_FORCE_TO_NONPRIV_RANGE_4);
> +}
> +

> ...

>   
>   struct i915_perf {
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 86a23ced051b..2e3d264339e0 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -675,6 +675,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   #define  GEN7_OASTATUS2_HEAD_MASK           0xffffffc0
>   #define  GEN7_OASTATUS2_MEM_SELECT_GGTT     (1 << 0) /* 0: PPGTT, 1: GGTT */
>   
> +#define GEN8_GPU_TICKS _MMIO(0x2910)
>   #define GEN8_OASTATUS _MMIO(0x2b08)
>   #define  GEN8_OASTATUS_OVERRUN_STATUS	    (1 << 3)
>   #define  GEN8_OASTATUS_COUNTER_OVERFLOW     (1 << 2)
> @@ -696,6 +697,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   #define OABUFFER_SIZE_16M   (7 << 3)
>   
>   #define GEN12_OA_TLB_INV_CR _MMIO(0xceec)
> +#define GEN12_SQCNT1 _MMIO(0x8718)
>   
>   /* Gen12 OAR unit */
>   #define GEN12_OAR_OACONTROL _MMIO(0x2960)
> @@ -731,6 +733,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   #define  GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS     (1 << 2)
>   #define  GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1)
>   
> +#define GEN12_OAG_GPU_TICKS _MMIO(0xda90)
>   #define GEN12_OAG_OASTATUS _MMIO(0xdafc)
>   #define  GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2)
>   #define  GEN12_OAG_OASTATUS_BUFFER_OVERFLOW  (1 << 1)
> @@ -972,6 +975,17 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   #define OAREPORTTRIG8_NOA_SELECT_6_SHIFT    24
>   #define OAREPORTTRIG8_NOA_SELECT_7_SHIFT    28
>   
> +/* Performance counters registers */
> +#define OA_PERF_COUNTER_A18   _MMIO(0x2890)
> +#define OA_PERF_COUNTER_A19   _MMIO(0x2898)
> +#define OA_PERF_COUNTER_A20   _MMIO(0x28A0)
Maybe turn this into OA_PERF_COUNTER(idx) _MMIO(0x2800 + idx * 8)
> +
> +/* Gen12 Performance counters registers */
> +#define GEN12_OAG_PERF_COUNTER_A16   _MMIO(0xDA00)
> +#define GEN12_OAG_PERF_COUNTER_A18   _MMIO(0xDA10)
> +#define GEN12_OAG_PERF_COUNTER_A19   _MMIO(0xDA18)
> +#define GEN12_OAG_PERF_COUNTER_A20   _MMIO(0xDA20)
Same here
> +
>   /* Same layout as OASTARTTRIGX */
>   #define GEN12_OAG_OASTARTTRIG1 _MMIO(0xd900)
>   #define GEN12_OAG_OASTARTTRIG2 _MMIO(0xd904)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 14b67cd6b54b..62b88c0123c8 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -2048,6 +2048,25 @@ struct drm_i915_perf_open_param {z
>    */
>   #define I915_PERF_IOCTL_CONFIG	_IO('i', 0x2)
>   
> +/**
> + * Returns OA buffer properties.
> + *
> + * This ioctl is available in perf revision 6.
> + */
> +#define I915_PERF_IOCTL_GET_OA_BUFFER_INFO _IO('i', 0x3)
> +
> +/**
> + * OA buffer information structure.
> + */
> +struct drm_i915_perf_oa_buffer_info {
> +	__u32 size;
> +	__u32 head;
> +	__u32 tail;
> +	__u32 gpu_address;
> +	__u64 cpu_address;
> +	__u64 reserved[4];
> +};
> +
>   /**
>    * Common to all i915 perf records
>    */

Thanks a lot,


-Lionel



More information about the Intel-gfx mailing list