[Intel-gfx] [PATCH v16 16/17] drm/i915/perf: notify sseu configuration changes
Matthew Auld
matthew.william.auld at gmail.com
Fri Jun 9 17:56:55 UTC 2017
On 5 June 2017 at 15:48, Lionel Landwerlin
<lionel.g.landwerlin at intel.com> wrote:
> This adds the ability for userspace to request that the kernel track &
> record sseu configuration changes. These changes are inserted into the
> perf stream so that userspace can interpret the OA reports using the
> configuration applied at the time the OA reports where generated.
>
> v2: Handle timestamps wrapping around 32bits (Lionel)
>
> v3: Use void * instead of u8 * for buffer virtual address (Chris)
> Use i915_vam_unpin_and_release (Chris)
> Fix a deadlock when SSEU buffer overflows (Lionel)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 45 ++++
> drivers/gpu/drm/i915/i915_gem_context.c | 3 +
> drivers/gpu/drm/i915/i915_gem_context.h | 21 ++
> drivers/gpu/drm/i915/i915_perf.c | 454 ++++++++++++++++++++++++++++++--
> drivers/gpu/drm/i915/intel_lrc.c | 7 +-
> include/uapi/drm/i915_drm.h | 35 +++
> 6 files changed, 535 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7ef1908e3a98..82f4e52718eb 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1970,6 +1970,12 @@ struct i915_perf_stream {
> bool noa_restore;
>
> /**
> + * @has_sseu: Whether the stream emits SSEU configuration changes
> + * reports.
> + */
> + bool has_sseu;
> +
> + /**
> * @enabled: Whether the stream is currently enabled, considering
> * whether the stream was opened in a disabled state and based
> * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
> @@ -2508,6 +2514,44 @@ struct drm_i915_private {
> */
> atomic_t noa_restore;
> } oa;
> +
> + struct {
> + /**
> + * Buffer containing change reports of the SSEU
> + * configuration.
> + */
> + struct i915_vma *vma;
> + void *vaddr;
> +
> + /**
> + * Scheduler write to the head, and the perf driver
> + * reads from tail.
> + */
> + u32 head;
> + u32 tail;
> +
> + /**
> + * Is the sseu buffer enabled.
> + */
> + bool enabled;
> +
> + /**
> + * Whether the buffer overflown.
> + */
> + bool overflow;
> +
> + /**
> + * Keeps track of how many times the OA unit has been
> + * enabled. This number is used to discard stale
> + * @perf_sseu in @i915_gem_context.
> + */
> + atomic64_t enable_no;
> +
> + /**
> + * Lock writes & tail pointer updates on this buffer.
> + */
> + spinlock_t ptr_lock;
> + } sseu_buffer;
> } perf;
>
> /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
> @@ -3558,6 +3602,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
> struct i915_gem_context *ctx,
> uint32_t *reg_state);
> int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req);
> +void i915_perf_emit_sseu_config(struct drm_i915_gem_request *req);
>
> /* i915_gem_evict.c */
> int __must_check i915_gem_evict_something(struct i915_address_space *vm,
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 60d0bbf0ae2b..368bb527a75a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -510,6 +510,9 @@ mi_set_context(struct drm_i915_gem_request *req, u32 flags)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> + /* Notify perf of possible change in SSEU configuration. */
> + i915_perf_emit_sseu_config(req);
> +
> /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
> if (INTEL_GEN(dev_priv) >= 7) {
> *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index e8247e2f6011..35ebdde1cd16 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -194,6 +194,27 @@ struct i915_gem_context {
>
> /** remap_slice: Bitmask of cache lines that need remapping */
> u8 remap_slice;
> +
> + /**
> + * @perf_sseu: last SSEU configuration notified to userspace
> + *
> + * SSEU configuration changes are notified to userspace through the
> + * perf infrastructure. We keep track here of the last notified
> + * configuration for a given context since configuration can change
> + * per engine.
> + */
> + struct sseu_dev_info perf_sseu;
> +
> + /**
> + * @perf_enable_no: last perf enable identifier
> + *
> + * Userspace can enable/disable the perf infrastructure whenever it
> + * wants without reconfiguring the OA unit. This number is updated at
> + * the same time as @perf_sseu by copying
> + * dev_priv->perf.oa.oa_sseu.enable_no which changes every time the
> + * user enables the OA unit.
> + */
> + u64 perf_enable_no;
> };
>
> static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 3f49ce69641b..87ebc450c456 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -212,6 +212,15 @@
> */
> #define OA_BUFFER_SIZE SZ_16M
>
> +/* Dimension the SSEU buffer based on the size of the OA buffer to ensure we
> + * don't run out of space before the OA unit notifies us new data is
> + * available.
> + */
> +#define SSEU_BUFFER_ITEM_SIZE sizeof(struct perf_sseu_change)
> +#define SSEU_BUFFER_SIZE ((OA_BUFFER_SIZE * SSEU_BUFFER_ITEM_SIZE) / \
> + 256 /* Maximum report size */)
> +#define SSEU_BUFFER_NB_ITEM (SSEU_BUFFER_SIZE / SSEU_BUFFER_ITEM_SIZE)
> +
> #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
>
> /**
> @@ -352,6 +361,8 @@ struct perf_open_properties {
>
> bool noa_restore;
>
> + bool sseu_enable;
> +
> /* OA sampling state */
> int metrics_set;
> int oa_format;
> @@ -359,6 +370,19 @@ struct perf_open_properties {
> int oa_period_exponent;
> };
>
> +struct perf_sseu_change {
> + u32 timestamp;
> + struct drm_i915_perf_sseu_change change;
> +};
> +
> +static void i915_oa_stream_disable(struct i915_perf_stream *stream);
> +static void i915_oa_stream_enable(struct i915_perf_stream *stream);
> +
> +static bool timestamp_passed(u32 t0, u32 t1)
> +{
> + return (s32)(t0 - t1) >= 0;
Formatting seems off, also timestamp_passed_or_equal?
> +}
> +
> static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
> {
> return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
> @@ -571,6 +595,87 @@ static int append_oa_sample(struct i915_perf_stream *stream,
> return 0;
> }
>
> +static int append_sseu_change(struct i915_perf_stream *stream,
> + char __user *buf,
> + size_t count,
> + size_t *offset,
> + const struct perf_sseu_change *sseu_report)
> +{
> + struct drm_i915_perf_record_header header;
> +
> + header.type = DRM_I915_PERF_RECORD_SSEU_CHANGE;
> + header.pad = 0;
> + header.size = sizeof(struct drm_i915_perf_record_header) +
> + sizeof(struct drm_i915_perf_sseu_change);
> +
> + if ((count - *offset) < header.size)
> + return -ENOSPC;
> +
> + buf += *offset;
> + if (copy_to_user(buf, &header, sizeof(header)))
> + return -EFAULT;
> + buf += sizeof(header);
> +
> + if (copy_to_user(buf, &sseu_report->change,
> + sizeof(struct drm_i915_perf_sseu_change)))
> + return -EFAULT;
> +
> + (*offset) += header.size;
> +
> + return 0;
> +}
> +
> +static struct perf_sseu_change *
> +sseu_buffer_peek(struct drm_i915_private *dev_priv, u32 head, u32 tail)
> +{
> + if (head == tail)
> + return NULL;
> +
> + return dev_priv->perf.sseu_buffer.vaddr + tail * SSEU_BUFFER_ITEM_SIZE;
> +}
> +
> +static struct perf_sseu_change *
> +sseu_buffer_advance(struct drm_i915_private *dev_priv, u32 head, u32 *tail)
> +{
> + *tail = (*tail + 1) % SSEU_BUFFER_NB_ITEM;
> +
> + return sseu_buffer_peek(dev_priv, head, *tail);
> +}
> +
> +static void
> +sseu_buffer_read_pointers(struct i915_perf_stream *stream, u32 *head, u32 *tail)
> +{
> + struct drm_i915_private *dev_priv = stream->dev_priv;
> +
> + if (!stream->has_sseu) {
> + *head = 0;
> + *tail = 0;
> + return;
> + }
> +
> + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> + *head = dev_priv->perf.sseu_buffer.head;
> + *tail = dev_priv->perf.sseu_buffer.tail;
> +
> + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> +static void
> +sseu_buffer_write_tail_pointer(struct i915_perf_stream *stream, u32 tail)
> +{
> + struct drm_i915_private *dev_priv = stream->dev_priv;
> +
> + if (!stream->has_sseu)
> + return;
> +
> + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> + dev_priv->perf.sseu_buffer.tail = tail;
> +
> + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> /**
> * Copies all buffered OA reports into userspace read() buffer.
> * @stream: An i915-perf stream opened for OA metrics
> @@ -606,6 +711,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> unsigned int aged_tail_idx;
> u32 head, tail;
> u32 taken;
> + struct perf_sseu_change *sseu_report;
> + u32 sseu_head, sseu_tail;
> int ret = 0;
>
> if (WARN_ON(!stream->enabled))
> @@ -646,6 +753,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> head, tail))
> return -EIO;
>
> + /* Extract first SSEU report. */
> + sseu_buffer_read_pointers(stream, &sseu_head, &sseu_tail);
> + sseu_report = sseu_buffer_peek(dev_priv, sseu_head, sseu_tail);
>
> for (/* none */;
> (taken = OA_TAKEN(tail, head));
> @@ -686,6 +796,26 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> continue;
> }
>
> + while (sseu_report &&
> + !timestamp_passed(sseu_report->timestamp, report32[1])) {
> + /*
> + * While filtering for a single context we avoid
> + * leaking the IDs of other contexts.
> + */
> + if (stream->ctx &&
> + stream->ctx->hw_id != sseu_report->change.hw_id) {
> + sseu_report->change.hw_id = INVALID_CTX_ID;
> + }
> +
> + ret = append_sseu_change(stream, buf, count,
> + offset, sseu_report);
> + if (ret)
> + break;
> +
> + sseu_report = sseu_buffer_advance(dev_priv, sseu_head,
> + &sseu_tail);
> + }
> +
> /*
> * XXX: Just keep the lower 21 bits for now since I'm not
> * entirely sure if the HW touches any of the higher bits in
> @@ -783,6 +913,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
> }
>
> + /* Update tail pointer of the sseu buffer. */
> + sseu_buffer_write_tail_pointer(stream, sseu_tail);
> +
> return ret;
> }
>
> @@ -812,12 +945,29 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
> size_t *offset)
> {
> struct drm_i915_private *dev_priv = stream->dev_priv;
> + bool overflow = false;
> u32 oastatus;
> int ret;
>
> if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
> return -EIO;
>
> + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> + overflow = dev_priv->perf.sseu_buffer.overflow;
> + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> + if (overflow) {
> + DRM_DEBUG("SSEU buffer overflow\n");
> +
> + ret = append_oa_status(stream, buf, count, offset,
> + DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> + if (ret)
> + return ret;
> +
> + i915_oa_stream_disable(stream);
> + i915_oa_stream_enable(stream);
> + }
> +
> oastatus = I915_READ(GEN8_OASTATUS);
>
> /*
> @@ -835,16 +985,18 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
> * that something has gone quite badly wrong.
> */
> if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
> - ret = append_oa_status(stream, buf, count, offset,
> - DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> - if (ret)
> - return ret;
> -
> DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
> dev_priv->perf.oa.period_exponent);
>
> - dev_priv->perf.oa.ops.oa_disable(dev_priv);
> - dev_priv->perf.oa.ops.oa_enable(dev_priv);
> + if (!overflow) {
> + ret = append_oa_status(stream, buf, count, offset,
> + DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> + if (ret)
> + return ret;
> +
> + i915_oa_stream_disable(stream);
> + i915_oa_stream_enable(stream);
> + }
>
> /*
> * Note: .oa_enable() is expected to re-init the oabuffer and
> @@ -854,10 +1006,12 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
> }
>
> if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
> - ret = append_oa_status(stream, buf, count, offset,
> - DRM_I915_PERF_RECORD_OA_REPORT_LOST);
> - if (ret)
> - return ret;
> + if (!overflow) {
> + ret = append_oa_status(stream, buf, count, offset,
> + DRM_I915_PERF_RECORD_OA_REPORT_LOST);
> + if (ret)
> + return ret;
> + }
> I915_WRITE(GEN8_OASTATUS,
> oastatus & ~GEN8_OASTATUS_REPORT_LOST);
> }
> @@ -900,6 +1054,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> unsigned int aged_tail_idx;
> u32 head, tail;
> u32 taken;
> + struct perf_sseu_change *sseu_report;
> + u32 sseu_head, sseu_tail;
> int ret = 0;
>
> if (WARN_ON(!stream->enabled))
> @@ -937,6 +1093,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> head, tail))
> return -EIO;
>
> + /* Extract first SSEU report. */
> + sseu_buffer_read_pointers(stream, &sseu_head, &sseu_tail);
> + sseu_report = sseu_buffer_peek(dev_priv, sseu_head, sseu_tail);
>
> for (/* none */;
> (taken = OA_TAKEN(tail, head));
> @@ -969,6 +1128,27 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> continue;
> }
>
> + /* Emit any potential sseu report. */
> + while (sseu_report &&
> + !timestamp_passed(sseu_report->timestamp, report32[1])) {
> + /*
> + * While filtering for a single context we avoid
> + * leaking the IDs of other contexts.
> + */
> + if (stream->ctx &&
> + stream->ctx->hw_id != sseu_report->change.hw_id) {
> + sseu_report->change.hw_id = INVALID_CTX_ID;
> + }
> +
> + ret = append_sseu_change(stream, buf, count,
> + offset, sseu_report);
> + if (ret)
> + break;
> +
> + sseu_report = sseu_buffer_advance(dev_priv, sseu_head,
> + &sseu_tail);
> + }
> +
> ret = append_oa_sample(stream, buf, count, offset, report);
> if (ret)
> break;
> @@ -998,6 +1178,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
> }
>
> + /* Update tail pointer of the sseu buffer. */
> + sseu_buffer_write_tail_pointer(stream, sseu_tail);
> +
> return ret;
> }
>
> @@ -1023,12 +1206,29 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
> size_t *offset)
> {
> struct drm_i915_private *dev_priv = stream->dev_priv;
> + bool overflow = false;
> u32 oastatus1;
> int ret;
>
> if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
> return -EIO;
>
> + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> + overflow = dev_priv->perf.sseu_buffer.overflow;
> + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> + if (overflow) {
> + DRM_DEBUG("SSEU buffer overflow\n");
> +
> + ret = append_oa_status(stream, buf, count, offset,
> + DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> + if (ret)
> + return ret;
> +
> + i915_oa_stream_disable(stream);
> + i915_oa_stream_enable(stream);
> + }
> +
> oastatus1 = I915_READ(GEN7_OASTATUS1);
>
> /* XXX: On Haswell we don't have a safe way to clear oastatus1
> @@ -1059,25 +1259,30 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
> * now.
> */
> if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
> - ret = append_oa_status(stream, buf, count, offset,
> - DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> - if (ret)
> - return ret;
> -
> DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
> dev_priv->perf.oa.period_exponent);
>
> - dev_priv->perf.oa.ops.oa_disable(dev_priv);
> - dev_priv->perf.oa.ops.oa_enable(dev_priv);
> + if (!overflow) {
> + ret = append_oa_status(stream, buf, count, offset,
> + DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> + if (ret)
> + return ret;
> +
> + i915_oa_stream_disable(stream);
> + i915_oa_stream_enable(stream);
> + }
>
> oastatus1 = I915_READ(GEN7_OASTATUS1);
> }
>
> if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
> - ret = append_oa_status(stream, buf, count, offset,
> - DRM_I915_PERF_RECORD_OA_REPORT_LOST);
> - if (ret)
> - return ret;
> + if (!overflow) {
> + ret = append_oa_status(stream, buf, count, offset,
> + DRM_I915_PERF_RECORD_OA_REPORT_LOST);
> + if (ret)
> + return ret;
> + }
> +
> dev_priv->perf.oa.gen7_latched_oastatus1 |=
> GEN7_OASTATUS1_REPORT_LOST;
> }
> @@ -1227,18 +1432,21 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
> }
>
> static void
> -free_oa_buffer(struct drm_i915_private *i915)
> +free_sseu_buffer(struct drm_i915_private *i915)
> {
> - mutex_lock(&i915->drm.struct_mutex);
> + i915_gem_object_unpin_map(i915->perf.sseu_buffer.vma->obj);
> + i915_vma_unpin_and_release(&i915->perf.sseu_buffer.vma);
>
> + i915->perf.sseu_buffer.vaddr = NULL;
> +}
> +
> +static void
> +free_oa_buffer(struct drm_i915_private *i915)
> +{
> i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj);
> - i915_vma_unpin(i915->perf.oa.oa_buffer.vma);
> - i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj);
> + i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma);
>
> - i915->perf.oa.oa_buffer.vma = NULL;
> i915->perf.oa.oa_buffer.vaddr = NULL;
> -
> - mutex_unlock(&i915->drm.struct_mutex);
> }
>
> static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> @@ -1255,8 +1463,15 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>
> dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
>
> + mutex_lock(&dev_priv->drm.struct_mutex);
> +
> + if (stream->has_sseu)
> + free_sseu_buffer(dev_priv);
> +
> free_oa_buffer(dev_priv);
>
> + mutex_unlock(&dev_priv->drm.struct_mutex);
> +
> intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> intel_runtime_pm_put(dev_priv);
>
> @@ -1269,6 +1484,97 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> }
> }
>
> +static void init_sseu_buffer(struct drm_i915_private *dev_priv,
> + bool need_lock)
> +{
> + /* Cleanup auxilliary buffer. */
> + memset(dev_priv->perf.sseu_buffer.vaddr, 0, SSEU_BUFFER_SIZE);
> +
> + atomic64_inc(&dev_priv->perf.sseu_buffer.enable_no);
> +
> + if (need_lock)
> + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> + /* Initialize head & tail pointers. */
> + dev_priv->perf.sseu_buffer.head = 0;
> + dev_priv->perf.sseu_buffer.tail = 0;
> + dev_priv->perf.sseu_buffer.overflow = false;
> +
> + if (need_lock)
> + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> +static int alloc_sseu_buffer(struct drm_i915_private *dev_priv)
> +{
> + struct drm_i915_gem_object *bo;
> + struct i915_vma *vma;
> + int ret = 0;
> +
> + bo = i915_gem_object_create(dev_priv, SSEU_BUFFER_SIZE);
> + if (IS_ERR(bo)) {
> + DRM_ERROR("Failed to allocate SSEU buffer\n");
> + ret = PTR_ERR(bo);
> + goto out;
> + }
> +
> + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SSEU_BUFFER_SIZE, 0);
> + if (IS_ERR(vma)) {
> + ret = PTR_ERR(vma);
> + goto err_unref;
> + }
> + dev_priv->perf.sseu_buffer.vma = vma;
> +
> + dev_priv->perf.sseu_buffer.vaddr =
> + i915_gem_object_pin_map(bo, I915_MAP_WB);
> + if (IS_ERR(dev_priv->perf.sseu_buffer.vaddr)) {
> + ret = PTR_ERR(dev_priv->perf.sseu_buffer.vaddr);
> + goto err_unpin;
> + }
> +
> + atomic64_set(&dev_priv->perf.sseu_buffer.enable_no, 0);
> +
> + init_sseu_buffer(dev_priv, true);
> +
> + DRM_DEBUG_DRIVER("OA SSEU Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
> + i915_ggtt_offset(dev_priv->perf.sseu_buffer.vma),
> + dev_priv->perf.sseu_buffer.vaddr);
> +
> + goto out;
> +
> +err_unpin:
> + __i915_vma_unpin(vma);
> +
> +err_unref:
> + i915_gem_object_put(bo);
> +
> + dev_priv->perf.sseu_buffer.vaddr = NULL;
> + dev_priv->perf.sseu_buffer.vma = NULL;
> +
> +out:
> + return ret;
> +}
> +
> +static void sseu_enable(struct drm_i915_private *dev_priv)
> +{
> + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> + init_sseu_buffer(dev_priv, false);
> +
> + dev_priv->perf.sseu_buffer.enabled = true;
> +
> + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> +static void sseu_disable(struct drm_i915_private *dev_priv)
> +{
> + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> + dev_priv->perf.sseu_buffer.enabled = false;
> + dev_priv->perf.sseu_buffer.overflow = false;
> +
> + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
> {
> u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
> @@ -1763,6 +2069,9 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> struct intel_context *ce = &ctx->engine[RCS];
> u32 *regs;
>
> + memset(&ctx->perf_sseu, 0, sizeof(ctx->perf_sseu));
> + ctx->perf_enable_no = 0;
> +
> /* OA settings will be set upon first use */
> if (!ce->state)
> continue;
> @@ -1927,6 +2236,9 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
> {
> struct drm_i915_private *dev_priv = stream->dev_priv;
>
> + if (stream->has_sseu)
> + sseu_enable(dev_priv);
> +
> dev_priv->perf.oa.ops.oa_enable(dev_priv);
>
> if (dev_priv->perf.oa.periodic)
> @@ -1957,6 +2269,9 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
> {
> struct drm_i915_private *dev_priv = stream->dev_priv;
>
> + if (stream->has_sseu)
> + sseu_disable(dev_priv);
> +
> dev_priv->perf.oa.ops.oa_disable(dev_priv);
>
> if (dev_priv->perf.oa.periodic)
> @@ -2087,6 +2402,14 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> atomic_inc(&dev_priv->perf.oa.noa_restore);
> }
>
> + if (props->sseu_enable) {
> + ret = alloc_sseu_buffer(dev_priv);
> + if (ret)
> + goto err_sseu_buf_alloc;
> +
> + stream->has_sseu = true;
> + }
> +
> ret = alloc_oa_buffer(dev_priv);
> if (ret)
> goto err_oa_buf_alloc;
> @@ -2119,6 +2442,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> err_enable:
> intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> intel_runtime_pm_put(dev_priv);
> + free_sseu_buffer(dev_priv);
> +
> +err_sseu_buf_alloc:
> free_oa_buffer(dev_priv);
>
> err_oa_buf_alloc:
This looks bogus, it needs to be:
err_enable:
free_oa_buffer()
err_oa_buf_alloc:
if (stream->has_sseu)
free_sseu_buffer()
err_sseu_buf_alloc:
> @@ -2212,6 +2538,69 @@ int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req)
> }
>
> /**
> + * i915_perf_emit_sseu_config - emit the SSEU configuration of a gem request
> + * @req: the request
> + *
> + * If the OA unit is enabled, the write the SSEU configuration of a
> + * given request into a circular buffer. The buffer is read at the
> + * same time as the OA buffer and its elements are inserted into the
> + * perf stream to notify the userspace of SSEU configuration changes.
> + * This is needed to interpret values from the OA reports.
> + */
> +void i915_perf_emit_sseu_config(struct drm_i915_gem_request *req)
> +{
> + struct drm_i915_private *dev_priv = req->i915;
> + struct perf_sseu_change *report;
> + const struct sseu_dev_info *sseu;
> + u32 head, timestamp;
> + u64 perf_enable_no;
> +
> + /* Perf not supported. */
> + if (!dev_priv->perf.initialized)
> + return;
> +
> + /* Nothing's changed, no need to signal userspace. */
> + sseu = &req->ctx->engine[req->engine->id].sseu;
> + perf_enable_no = atomic64_read(&dev_priv->perf.sseu_buffer.enable_no);
> + if (perf_enable_no == req->ctx->perf_enable_no &&
> + memcmp(&req->ctx->perf_sseu, sseu, sizeof(*sseu)) == 0)
> + return;
> +
> + memcpy(&req->ctx->perf_sseu, sseu, sizeof(*sseu));
> + req->ctx->perf_enable_no = perf_enable_no;
> +
> + /* Read the timestamp outside the spin lock. */
> + timestamp = I915_READ(RING_TIMESTAMP(RENDER_RING_BASE));
Except we then proceed to read it again inside the spinlock :)
> +
> + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> + if (!dev_priv->perf.sseu_buffer.enabled)
> + goto out;
> +
> + head = dev_priv->perf.sseu_buffer.head;
> + dev_priv->perf.sseu_buffer.head =
> + (dev_priv->perf.sseu_buffer.head + 1) % SSEU_BUFFER_NB_ITEM;
> +
> + /* This should never happen, unless we've wrongly dimensioned the SSEU
> + * buffer. */
> + if (dev_priv->perf.sseu_buffer.head == dev_priv->perf.sseu_buffer.tail) {
> + dev_priv->perf.sseu_buffer.overflow = true;
> + goto out;
> + }
What is the intention here if we do overflow, we seem to bail
initially, but if we re-enter and we are still in an overflow state we
will continue writing out reports anyway?
> +
> + report = dev_priv->perf.sseu_buffer.vaddr + head * SSEU_BUFFER_ITEM_SIZE;
> + memset(report, 0, sizeof(*report));
> +
> + report->timestamp = I915_READ(RING_TIMESTAMP(RENDER_RING_BASE));
> + report->change.hw_id = req->ctx->hw_id;
> + report->change.slice_mask = sseu->slice_mask;
> + report->change.subslice_mask = sseu->subslice_mask;
> +
> +out:
> + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> +/**
> * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
> * @stream: An i915 perf stream
> * @file: An i915 perf stream file
> @@ -2867,6 +3256,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
> case DRM_I915_PERF_PROP_NOA_RESTORE:
> props->noa_restore = true;
> break;
> + case DRM_I915_PERF_PROP_SSEU_CHANGE:
> + props->sseu_enable = true;
> + break;
> case DRM_I915_PERF_PROP_MAX:
> MISSING_CASE(id);
> return -EINVAL;
> @@ -3229,6 +3621,10 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>
> atomic_set(&dev_priv->perf.oa.noa_restore, 0);
>
> + spin_lock_init(&dev_priv->perf.sseu_buffer.ptr_lock);
> + dev_priv->perf.sseu_buffer.enabled = false;
> + atomic64_set(&dev_priv->perf.sseu_buffer.enable_no, 0);
> +
> oa_sample_rate_hard_limit =
> dev_priv->perf.oa.timestamp_frequency / 2;
> dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ad37e2b236b0..e2f83a12337b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -350,8 +350,10 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
> rq = port_unpack(&port[n], &count);
> if (rq) {
> GEM_BUG_ON(count > !n);
> - if (!count++)
> + if (!count++) {
> + i915_perf_emit_sseu_config(rq);
> execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> + }
> port_set(&port[n], port_pack(rq, count));
> desc = execlists_update_context(rq);
> GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
> @@ -1406,6 +1408,9 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
> req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine);
> }
>
> + /* Emit NOA config */
> + i915_oa_emit_noa_config_locked(req);
> +
> cs = intel_ring_begin(req, 4);
> if (IS_ERR(cs))
> return PTR_ERR(cs);
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 27cc72a85d07..c82f7473e3a0 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -447,6 +447,16 @@ typedef struct drm_i915_setparam {
> int value;
> } drm_i915_setparam_t;
>
> +union drm_i915_gem_param_sseu {
> + struct {
> + __u8 slice_mask;
> + __u8 subslice_mask;
> + __u8 min_eu_per_subslice;
> + __u8 max_eu_per_subslice;
> + } packed;
> + __u64 value;
> +};
Please kill?
> +
> /* A memory manager for regions of shared memory:
> */
> #define I915_MEM_REGION_AGP 1
> @@ -1376,6 +1386,13 @@ enum drm_i915_perf_property_id {
> */
> DRM_I915_PERF_PROP_NOA_RESTORE,
>
> + /**
> + * Specifying this property will lead the perf driver to insert sseu
> + * change reports (see @DRM_I915_PERF_RECORD_SSEU_CHANGE) in the
> + * stream of reports.
> + */
> + DRM_I915_PERF_PROP_SSEU_CHANGE,
> +
> DRM_I915_PERF_PROP_MAX /* non-ABI */
> };
>
> @@ -1459,9 +1476,27 @@ enum drm_i915_perf_record_type {
> */
> DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3,
>
> + /**
> + * A change in the sseu configuration happened for a particular
> + * context.
> + *
> + * struct {
> + * struct drm_i915_perf_record_header header;
> + * { struct drm_i915_perf_sseu_change change; } && DRM_I915_PERF_PROP_SSEU_CHANGE
> + * };
> + */
> + DRM_I915_PERF_RECORD_SSEU_CHANGE = 4,
> +
> DRM_I915_PERF_RECORD_MAX /* non-ABI */
> };
>
> +struct drm_i915_perf_sseu_change {
> + __u32 hw_id;
> + __u16 pad;
> + __u8 slice_mask;
> + __u8 subslice_mask;
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
> --
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list