[Intel-gfx] [PATCH 33/39] drm/i915/perf: add a parameter to control the size of OA buffer

Lionel Landwerlin lionel.g.landwerlin at intel.com
Wed Aug 21 08:37:04 UTC 2019


I think this patch needs to be landed after 
https://patchwork.freedesktop.org/patch/319815/?series=60916&rev=10
As recommended by Joonas.

It should also bump the revision number of the perf API so that the 
application can detect this feature is available.

Thanks,

-Lionel

On 16/08/2019 10:04, Lucas De Marchi wrote:
> From: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>
> The way our hardware is designed doesn't seem to let us use the
> MI_RECORD_PERF_COUNT command without setting up a circular buffer.
>
> In the case where the user didn't request OA reports to be available
> through the i915 perf stream, we can set the OA buffer to the minimum
> size to avoid consuming memory which won't be used by the driver.
>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_perf.c | 98 +++++++++++++++++++++-----------
>   drivers/gpu/drm/i915/i915_reg.h  |  2 +
>   include/uapi/drm/i915_drm.h      |  7 +++
>   3 files changed, 74 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 2c9f46e12622..9386d9c82930 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -216,13 +216,7 @@
>   #include "oa/i915_oa_cnl.h"
>   #include "oa/i915_oa_icl.h"
>   
> -/* HW requires this to be a power of two, between 128k and 16M, though driver
> - * is currently generally designed assuming the largest 16M size is used such
> - * that the overflow cases are unlikely in normal operation.
> - */
> -#define OA_BUFFER_SIZE		SZ_16M
> -
> -#define OA_TAKEN(tail, head)	((tail - head) & (OA_BUFFER_SIZE - 1))
> +#define OA_TAKEN(tail, head)	(((tail) - (head)) & (stream->oa_buffer.vma->size - 1))
>   
>   /**
>    * DOC: OA Tail Pointer Race
> @@ -347,6 +341,7 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
>    * @oa_format: An OA unit HW report format
>    * @oa_periodic: Whether to enable periodic OA unit sampling
>    * @oa_period_exponent: The OA unit sampling period is derived from this
> + * @oa_buffer_size_exponent: The OA buffer size is derived from this
>    *
>    * As read_properties_unlocked() enumerates and validates the properties given
>    * to open a stream of metrics the configuration is built up in the structure
> @@ -363,6 +358,7 @@ struct perf_open_properties {
>   	int oa_format;
>   	bool oa_periodic;
>   	int oa_period_exponent;
> +	u32 oa_buffer_size_exponent;
>   };
>   
>   static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
> @@ -531,7 +527,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>   		 * could put the tail out of bounds...
>   		 */
>   		if (hw_tail >= gtt_offset &&
> -		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
> +		    hw_tail < (gtt_offset + stream->oa_buffer.vma->size)) {
>   			stream->oa_buffer.tails[!aged_idx].offset =
>   				aging_tail = hw_tail;
>   			stream->oa_buffer.aging_timestamp = now;
> @@ -659,7 +655,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>   	int report_size = stream->oa_buffer.format_size;
>   	u8 *oa_buf_base = stream->oa_buffer.vaddr;
>   	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
> -	u32 mask = (OA_BUFFER_SIZE - 1);
> +	u32 mask = (stream->oa_buffer.vma->size - 1);
>   	size_t start_offset = *offset;
>   	unsigned long flags;
>   	unsigned int aged_tail_idx;
> @@ -699,8 +695,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>   	 * only be incremented by multiples of the report size (notably also
>   	 * all a power of two).
>   	 */
> -	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
> -		      tail > OA_BUFFER_SIZE || tail % report_size,
> +	if (WARN_ONCE(head > stream->oa_buffer.vma->size || head % report_size ||
> +		      tail > stream->oa_buffer.vma->size || tail % report_size,
>   		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
>   		      head, tail))
>   		return -EIO;
> @@ -723,7 +719,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>   		 * here would imply a driver bug that would result
>   		 * in an overrun.
>   		 */
> -		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
> +		if (WARN_ON((stream->oa_buffer.vma->size - head) < report_size)) {
>   			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
>   			break;
>   		}
> @@ -881,11 +877,6 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
>   	 * automatically triggered reports in this condition and so we
>   	 * have to assume that old reports are now being trampled
>   	 * over.
> -	 *
> -	 * Considering how we don't currently give userspace control
> -	 * over the OA buffer size and always configure a large 16MB
> -	 * buffer, then a buffer overflow does anyway likely indicate
> -	 * that something has gone quite badly wrong.
>   	 */
>   	if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
>   		ret = append_oa_status(stream, buf, count, offset,
> @@ -947,7 +938,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>   	int report_size = stream->oa_buffer.format_size;
>   	u8 *oa_buf_base = stream->oa_buffer.vaddr;
>   	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
> -	u32 mask = (OA_BUFFER_SIZE - 1);
> +	u32 mask = (stream->oa_buffer.vma->size - 1);
>   	size_t start_offset = *offset;
>   	unsigned long flags;
>   	unsigned int aged_tail_idx;
> @@ -984,8 +975,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>   	 * only be incremented by multiples of the report size (notably also
>   	 * all a power of two).
>   	 */
> -	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
> -		      tail > OA_BUFFER_SIZE || tail % report_size,
> +	if (WARN_ONCE(head > stream->oa_buffer.vma->size || head % report_size ||
> +		      tail > stream->oa_buffer.vma->size || tail % report_size,
>   		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
>   		      head, tail))
>   		return -EIO;
> @@ -1005,7 +996,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>   		 * here would imply a driver bug that would result
>   		 * in an overrun.
>   		 */
> -		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
> +		if (WARN_ON((stream->oa_buffer.vma->size - head) < report_size)) {
>   			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
>   			break;
>   		}
> @@ -1408,7 +1399,9 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
>   
>   	I915_WRITE(GEN7_OABUFFER, gtt_offset);
>   
> -	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
> +	I915_WRITE(GEN7_OASTATUS1, gtt_offset |
> +		   ((stream->oa_buffer.size_exponent - 17) <<
> +		    GEN7_OASTATUS1_BUFFER_SIZE_SHIFT)); /* tail */
>   
>   	/* Mark that we need updated tail pointers to read from... */
>   	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> @@ -1433,7 +1426,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
>   	 * the assumption that new reports are being written to zeroed
>   	 * memory...
>   	 */
> -	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
> +	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.vma->size);
>   
>   	/* Maybe make ->pollin per-stream state if we support multiple
>   	 * concurrent streams in the future.
> @@ -1464,7 +1457,9 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
>   	 *  bit."
>   	 */
>   	I915_WRITE(GEN8_OABUFFER, gtt_offset |
> -		   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
> +		   ((stream->oa_buffer.size_exponent - 17) <<
> +		    GEN8_OABUFFER_BUFFER_SIZE_SHIFT) |
> +		   GEN8_OABUFFER_MEM_SELECT_GGTT);
>   	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
>   
>   	/* Mark that we need updated tail pointers to read from... */
> @@ -1492,7 +1487,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
>   	 * the assumption that new reports are being written to zeroed
>   	 * memory...
>   	 */
> -	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
> +	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.vma->size);
>   
>   	/*
>   	 * Maybe make ->pollin per-stream state if we support multiple
> @@ -1501,24 +1496,25 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
>   	stream->pollin = false;
>   }
>   
> -static int alloc_oa_buffer(struct i915_perf_stream *stream)
> +static int alloc_oa_buffer(struct i915_perf_stream *stream, int size_exponent)
>   {
>   	struct drm_i915_gem_object *bo;
>   	struct drm_i915_private *dev_priv = stream->dev_priv;
>   	struct i915_vma *vma;
> +	size_t size = 1U << size_exponent;
>   	int ret;
>   
>   	if (WARN_ON(stream->oa_buffer.vma))
>   		return -ENODEV;
>   
> +	if (WARN_ON(size < SZ_128K || size > SZ_16M))
> +		return -EINVAL;
> +
>   	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
>   	if (ret)
>   		return ret;
>   
> -	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
> -	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
> -
> -	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
> +	bo = i915_gem_object_create_shmem(dev_priv, size);
>   	if (IS_ERR(bo)) {
>   		DRM_ERROR("Failed to allocate OA buffer\n");
>   		ret = PTR_ERR(bo);
> @@ -1534,6 +1530,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
>   		goto err_unref;
>   	}
>   	stream->oa_buffer.vma = vma;
> +	stream->oa_buffer.size_exponent = size_exponent;
>   
>   	stream->oa_buffer.vaddr =
>   		i915_gem_object_pin_map(bo, I915_MAP_WB);
> @@ -1542,9 +1539,10 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
>   		goto err_unpin;
>   	}
>   
> -	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
> +	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p, size = %llu\n",
>   			 i915_ggtt_offset(stream->oa_buffer.vma),
> -			 stream->oa_buffer.vaddr);
> +			 stream->oa_buffer.vaddr,
> +			 stream->oa_buffer.vma->size);
>   
>   	goto unlock;
>   
> @@ -2251,7 +2249,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>   	stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
>   	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
>   
> -	ret = alloc_oa_buffer(stream);
> +	ret = alloc_oa_buffer(stream, props->oa_buffer_size_exponent);
>   	if (ret)
>   		goto err_oa_buf_alloc;
>   
> @@ -2823,6 +2821,26 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
>   			 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
>   }
>   
> +static int
> +select_oa_buffer_exponent(struct drm_i915_private *i915,
> +			  u64 requested_size)
> +{
> +	int order;
> +
> +	/*
> +	 * When no size is specified, use the largest size supported by all
> +	 * generations.
> +	 */
> +	if (!requested_size)
> +		return order_base_2(SZ_16M);
> +
> +	order = order_base_2(clamp_t(u64, requested_size, SZ_128K, SZ_16M));
> +	if (requested_size != (1UL << order))
> +		return -EINVAL;
> +
> +	return order;
> +}
> +
>   /**
>    * read_properties_unlocked - validate + copy userspace stream open properties
>    * @dev_priv: i915 device instance
> @@ -2950,6 +2968,14 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>   			props->oa_periodic = true;
>   			props->oa_period_exponent = value;
>   			break;
> +		case DRM_I915_PERF_PROP_OA_BUFFER_SIZE:
> +			ret = select_oa_buffer_exponent(dev_priv, value);
> +			if (ret < 0) {
> +				DRM_DEBUG("OA buffer size invalid %llu\n", value);
> +				return ret;
> +			}
> +			props->oa_buffer_size_exponent = ret;
> +			break;
>   		case DRM_I915_PERF_PROP_MAX:
>   			MISSING_CASE(id);
>   			return -EINVAL;
> @@ -2958,6 +2984,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>   		uprop += 2;
>   	}
>   
> +	/* If no buffer size was requested, select the default one. */
> +	if (!props->oa_buffer_size_exponent) {
> +		props->oa_buffer_size_exponent =
> +			select_oa_buffer_exponent(dev_priv, 0);
> +	}
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 34d83e3a51a7..d35f385f8288 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -675,12 +675,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   #define GEN8_OABUFFER_UDW _MMIO(0x23b4)
>   #define GEN8_OABUFFER _MMIO(0x2b14)
>   #define  GEN8_OABUFFER_MEM_SELECT_GGTT      (1 << 0)  /* 0: PPGTT, 1: GGTT */
> +#define  GEN8_OABUFFER_BUFFER_SIZE_SHIFT    3
>   
>   #define GEN7_OASTATUS1 _MMIO(0x2364)
>   #define  GEN7_OASTATUS1_TAIL_MASK	    0xffffffc0
>   #define  GEN7_OASTATUS1_COUNTER_OVERFLOW    (1 << 2)
>   #define  GEN7_OASTATUS1_OABUFFER_OVERFLOW   (1 << 1)
>   #define  GEN7_OASTATUS1_REPORT_LOST	    (1 << 0)
> +#define  GEN7_OASTATUS1_BUFFER_SIZE_SHIFT   3
>   
>   #define GEN7_OASTATUS2 _MMIO(0x2368)
>   #define  GEN7_OASTATUS2_HEAD_MASK           0xffffffc0
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 469dc512cca3..f662c534de0a 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1873,6 +1873,13 @@ enum drm_i915_perf_property_id {
>   	 */
>   	DRM_I915_PERF_PROP_OA_EXPONENT,
>   
> +	/**
> +	 * Specify a global OA buffer size to be allocated in bytes. The size
> +	 * specified must be supported by HW (currently supported sizes are
> +	 * powers of 2 ranging from 128Kb to 16Mb).
> +	 */
> +	DRM_I915_PERF_PROP_OA_BUFFER_SIZE,
> +
>   	DRM_I915_PERF_PROP_MAX /* non-ABI */
>   };
>   




More information about the Intel-gfx mailing list