[Intel-gfx] [PATCH v7 06/11] drm/i915: Enable i915 perf stream for Haswell OA unit
Chris Wilson
chris at chris-wilson.co.uk
Tue Oct 25 23:05:44 UTC 2016
On Tue, Oct 25, 2016 at 12:19:29AM +0100, Robert Bragg wrote:
> +static int claim_specific_ctx(struct i915_perf_stream *stream)
> +{
> + struct drm_i915_private *dev_priv = stream->dev_priv;
> + struct i915_vma *vma;
> + int ret;
> +
> + ret = i915_mutex_lock_interruptible(&dev_priv->drm);
Looking forward to the day these don't need struct_mutex.
> + if (ret)
> + return ret;
> +
> + /* So that we don't have to worry about updating the context ID
> + * in OACONTOL on the fly we make sure to pin the context
> + * upfront for the lifetime of the stream...
> + */
> + vma = stream->ctx->engine[RCS].state;
There's a caveat here that suggests I had better wrap up this into its
own function. (We need to flush dirty cachelines to memory on first
binding of the context.)
> + ret = i915_vma_pin(vma, 0, stream->ctx->ggtt_alignment,
> + PIN_GLOBAL | PIN_HIGH);
> + if (ret)
> + return ret;
Oops.
> +
> + dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma);
> +
> + mutex_unlock(&dev_priv->drm.struct_mutex);
> +
> + return 0;
> +}
> +static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
> +{
> + struct drm_i915_gem_object *bo;
> + struct i915_vma *vma;
> + int ret;
> +
> + BUG_ON(dev_priv->perf.oa.oa_buffer.vma);
> +
> + ret = i915_mutex_lock_interruptible(&dev_priv->drm);
> + if (ret)
> + return ret;
> +
> + BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
> + BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
> +
> + bo = i915_gem_object_create(&dev_priv->drm, OA_BUFFER_SIZE);
> + if (IS_ERR(bo)) {
> + DRM_ERROR("Failed to allocate OA buffer\n");
> + ret = PTR_ERR(bo);
> + goto unlock;
> + }
> +
> + ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
> + if (ret)
> + goto err_unref;
> +
> + /* PreHSW required 512K alignment, HSW requires 16M */
> + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, PIN_MAPPABLE);
Does this need mappable aperture space for OA? You aren't accessing it
via the aperture, but is the hw limited to it?
> + if (IS_ERR(vma)) {
> + ret = PTR_ERR(vma);
> + goto err_unref;
> + }
> + dev_priv->perf.oa.oa_buffer.vma = vma;
> +
> + dev_priv->perf.oa.oa_buffer.vaddr =
> + i915_gem_object_pin_map(bo, I915_MAP_WB);
> + if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
> + ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
> + goto err_unpin;
> + }
> +
> + dev_priv->perf.oa.ops.init_oa_buffer(dev_priv);
> +
> + DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p",
> + i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
> + dev_priv->perf.oa.oa_buffer.vaddr);
> +
> + goto unlock;
> +
> +err_unpin:
> + __i915_vma_unpin(vma);
> +
> +err_unref:
> + i915_gem_object_put(bo);
> +
> + dev_priv->perf.oa.oa_buffer.vaddr = NULL;
> + dev_priv->perf.oa.oa_buffer.vma = NULL;
> +
> +unlock:
> + mutex_unlock(&dev_priv->drm.struct_mutex);
> + return ret;
> +}
> + if (ret >= 0) {
> + /* Maybe make ->pollin per-stream state if we support multiple
> + * concurrent streams in the future. */
> + atomic_set(&dev_priv->perf.oa.pollin, false);
> + }
> +
> return ret;
> }
>
> -static unsigned int i915_perf_poll_locked(struct i915_perf_stream *stream,
> +static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> +{
> + struct drm_i915_private *dev_priv =
> + container_of(hrtimer, typeof(*dev_priv),
> + perf.oa.poll_check_timer);
> +
> + if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) {
> + atomic_set(&dev_priv->perf.oa.pollin, true);
> + wake_up(&dev_priv->perf.oa.poll_wq);
> + }
> +
> + hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
> +
> + return HRTIMER_RESTART;
> +}
> +
> +static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv,
> + struct i915_perf_stream *stream,
> struct file *file,
> poll_table *wait)
> {
> - unsigned int streams = 0;
> + unsigned int events = 0;
>
> stream->ops->poll_wait(stream, file, wait);
>
> - if (stream->ops->can_read(stream))
> - streams |= POLLIN;
> + /* Note: we don't explicitly check whether there's something to read
> + * here since this path may be very hot depending on what else
> + * userspace is polling, or on the timeout in use. We rely solely on
> + * the hrtimer/oa_poll_check_timer_cb to notify us when there are
> + * samples to read.
> + */
> + if (atomic_read(&dev_priv->perf.oa.pollin))
> + events |= POLLIN;
The atomic_set() and atomic_read() are superfluous, they don't even
impose any memory barriers. The required barrier here is from wake_up().
You can just use dev_priv->perf.ao.pollin = true; WRITE_ONCE() /
READ_ONCE() if you want to clearly show that it is outside of the lock
and barriers are imposed elsewhere.
> @@ -285,18 +1177,18 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
> + /* we avoid simply assigning stream->sample_flags = props->sample_flags
> + * to have _stream_init check the combination of sample flags more
> + * thoroughly, but still this is the expected result at this point.
> */
> - DRM_ERROR("Unsupported i915 perf stream configuration\n");
> - ret = -EINVAL;
> - goto err_alloc;
> + BUG_ON(stream->sample_flags != props->sample_flags);
if (WARN_ON(...)) {
ret = -ENODEV;
goto err_alloc;
}
just to avoid checkpatch complaining.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list