[Intel-gfx] [PATCH 05/13] drm/i915: Cache LRCA in the context
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Jan 12 01:59:03 PST 2016
On 11/01/16 19:41, Dave Gordon wrote:
> On 11/01/16 08:38, Daniel Vetter wrote:
>> On Fri, Jan 08, 2016 at 11:29:44AM +0000, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>>
>>> We are not allowed to call i915_gem_obj_ggtt_offset from irq
>>> context without the big kernel lock held.
>>>
>>> LRCA lifetime is well defined so cache it so it can be looked up
>>> cheaply from the interrupt context and at command submission
>>> time.
>>>
>>> v2: Added irq context reasoning to the commit message. (Daniel Vetter)
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> A i915_obj_for_each_vma macro with a
>> WARN_ON(!mutex_is_locked(dev->struct_mutex)) would be awesome to validate
>> this. Especially since this is by far not the only time I've seen this
>> bug. Needs to be a follow-up though to avoid stalling this fix.
>>
>>> ---
>>> drivers/gpu/drm/i915/i915_debugfs.c | 15 ++++++--------
>>> drivers/gpu/drm/i915/i915_drv.h | 1 +
>>> drivers/gpu/drm/i915/intel_lrc.c | 40
>>> ++++++++++++++++---------------------
>>> drivers/gpu/drm/i915/intel_lrc.h | 3 ++-
>>> 4 files changed, 26 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 3b05bd189eab..714a45cf8a51 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -1976,12 +1976,13 @@ static int i915_context_status(struct
>>> seq_file *m, void *unused)
>>> }
>>>
>>> static void i915_dump_lrc_obj(struct seq_file *m,
>>> - struct intel_engine_cs *ring,
>>> - struct drm_i915_gem_object *ctx_obj)
>>> + struct intel_context *ctx,
>>> + struct intel_engine_cs *ring)
>>> {
>>> struct page *page;
>>> uint32_t *reg_state;
>>> int j;
>>> + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>>> unsigned long ggtt_offset = 0;
>>>
>>> if (ctx_obj == NULL) {
>>> @@ -1991,7 +1992,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
>>> }
>>>
>>> seq_printf(m, "CONTEXT: %s %u\n", ring->name,
>>> - intel_execlists_ctx_id(ctx_obj));
>>> + intel_execlists_ctx_id(ctx, ring));
>>>
>>> if (!i915_gem_obj_ggtt_bound(ctx_obj))
>>> seq_puts(m, "\tNot bound in GGTT\n");
>>> @@ -2040,8 +2041,7 @@ static int i915_dump_lrc(struct seq_file *m,
>>> void *unused)
>>> list_for_each_entry(ctx, &dev_priv->context_list, link) {
>>> for_each_ring(ring, dev_priv, i) {
>>> if (ring->default_context != ctx)
>>> - i915_dump_lrc_obj(m, ring,
>>> - ctx->engine[i].state);
>>> + i915_dump_lrc_obj(m, ctx, ring);
>>> }
>>> }
>>>
>>> @@ -2115,11 +2115,8 @@ static int i915_execlists(struct seq_file *m,
>>> void *data)
>>>
>>> seq_printf(m, "\t%d requests in queue\n", count);
>>> if (head_req) {
>>> - struct drm_i915_gem_object *ctx_obj;
>>> -
>>> - ctx_obj = head_req->ctx->engine[ring_id].state;
>>> seq_printf(m, "\tHead request id: %u\n",
>>> - intel_execlists_ctx_id(ctx_obj));
>>> + intel_execlists_ctx_id(head_req->ctx, ring));
>>> seq_printf(m, "\tHead request tail: %u\n",
>>> head_req->tail);
>>> }
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 8cf655c6fc03..b77a5d84eac2 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -881,6 +881,7 @@ struct intel_context {
>>> struct drm_i915_gem_object *state;
>>> struct intel_ringbuffer *ringbuf;
>>> int pin_count;
>>> + u32 lrca;
>>
>> lrc_offset imo. Consistent with our other usage in the driver, and
>> actually readable. Please apply liberally everywhere else (I know that
>> bsepc calls it lrca, but we don't need to follow bad naming styles
>> blindly).
>>
>> With that Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
>>
>>> } engine[I915_NUM_RINGS];
>>>
>>> struct list_head link;
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index ff146a15d395..ffe004de22b0 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -265,7 +265,8 @@ int intel_sanitize_enable_execlists(struct
>>> drm_device *dev, int enable_execlists
>>>
>>> /**
>>> * intel_execlists_ctx_id() - get the Execlists Context ID
>>> - * @ctx_obj: Logical Ring Context backing object.
>>> + * @ctx: Context to get the ID for
>>> + * @ring: Engine to get the ID for
>>> *
>>> * Do not confuse with ctx->id! Unfortunately we have a name overload
>>> * here: the old context ID we pass to userspace as a handler so that
>>> @@ -275,14 +276,12 @@ int intel_sanitize_enable_execlists(struct
>>> drm_device *dev, int enable_execlists
>>> *
>>> * Return: 20-bits globally unique context ID.
>>> */
>>> -u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
>>> +u32 intel_execlists_ctx_id(struct intel_context *ctx,
>>> + struct intel_engine_cs *ring)
>>> {
>>> - u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
>>> - LRC_PPHWSP_PN * PAGE_SIZE;
>>> -
>>> /* LRCA is required to be 4K aligned so the more significant 20
>>> bits
>>> * are globally unique */
>>> - return lrca >> 12;
>>> + return ctx->engine[ring->id].lrca >> 12;
>>> }
>>>
>>> static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
>>> @@ -297,13 +296,11 @@ static bool disable_lite_restore_wa(struct
>>> intel_engine_cs *ring)
>>> uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>>> struct intel_engine_cs *ring)
>>> {
>>> - struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>>> + uint64_t lrca = ctx->engine[ring->id].lrca;
>>> uint64_t desc = ring->ctx_desc_template;
>>> - uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
>>> - LRC_PPHWSP_PN * PAGE_SIZE;
>>>
>>> desc |= lrca;
>>> - desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
>>> + desc |= (u64)intel_execlists_ctx_id(ctx, ring) <<
>>> GEN8_CTX_ID_SHIFT;
>>>
>>> return desc;
>>> }
>>> @@ -461,9 +458,7 @@ static bool execlists_check_remove_request(struct
>>> intel_engine_cs *ring,
>>> execlist_link);
>>>
>>> if (head_req != NULL) {
>>> - struct drm_i915_gem_object *ctx_obj =
>>> - head_req->ctx->engine[ring->id].state;
>>> - if (intel_execlists_ctx_id(ctx_obj) == request_id) {
>>> + if (intel_execlists_ctx_id(head_req->ctx, ring) ==
>>> request_id) {
>>> WARN(head_req->elsp_submitted == 0,
>>> "Never submitted head request\n");
>>>
>>> @@ -1023,15 +1018,17 @@ int logical_ring_flush_all_caches(struct
>>> drm_i915_gem_request *req)
>>> }
>>>
>>> static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
>>> - struct drm_i915_gem_object *ctx_obj,
>>> - struct intel_ringbuffer *ringbuf)
>>> + struct intel_context *ctx)
>>> {
>>> struct drm_device *dev = ring->dev;
>>> struct drm_i915_private *dev_priv = dev->dev_private;
>>> + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>>> + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>>> u64 lrca;
>>> - int ret = 0;
>>> + int ret;
>>>
>>> WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>>> +
>>> ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
>>> PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>>> if (ret)
>>> @@ -1047,6 +1044,7 @@ static int intel_lr_context_do_pin(struct
>>> intel_engine_cs *ring,
>>> if (ret)
>>> goto unpin_ctx_obj;
>>>
>>> + ctx->engine[ring->id].lrca = lrca;
>>> ctx_obj->dirty = true;
>>>
>>> /* Invalidate GuC TLB. */
>>> @@ -1065,11 +1063,9 @@ static int intel_lr_context_pin(struct
>>> drm_i915_gem_request *rq)
>>> {
>>> int ret = 0;
>>> struct intel_engine_cs *ring = rq->ring;
>>> - struct drm_i915_gem_object *ctx_obj =
>>> rq->ctx->engine[ring->id].state;
>>> - struct intel_ringbuffer *ringbuf = rq->ringbuf;
>>>
>>> if (rq->ctx->engine[ring->id].pin_count++ == 0) {
>>> - ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf);
>>> + ret = intel_lr_context_do_pin(ring, rq->ctx);
>>> if (ret)
>>> goto reset_pin_count;
>>> }
>>> @@ -1091,6 +1087,7 @@ void intel_lr_context_unpin(struct
>>> drm_i915_gem_request *rq)
>>> if (--rq->ctx->engine[ring->id].pin_count == 0) {
>>> intel_unpin_ringbuffer_obj(ringbuf);
>>> i915_gem_object_ggtt_unpin(ctx_obj);
>>> + rq->ctx->engine[ring->id].lrca = 0;
>>> }
>>> }
>>> }
>>> @@ -1960,10 +1957,7 @@ static int logical_ring_init(struct drm_device
>>> *dev, struct intel_engine_cs *rin
>>> goto error;
>>>
>>> /* As this is the default context, always pin it */
>>> - ret = intel_lr_context_do_pin(
>>> - ring,
>>> - ring->default_context->engine[ring->id].state,
>>> - ring->default_context->engine[ring->id].ringbuf);
>>> + ret = intel_lr_context_do_pin(ring, ring->default_context);
>>> if (ret) {
>>> DRM_ERROR(
>>> "Failed to pin and map ringbuffer %s: %d\n",
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h
>>> b/drivers/gpu/drm/i915/intel_lrc.h
>>> index de41ad6cd63d..f9db5f187d77 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>> @@ -106,6 +106,8 @@ void intel_lr_context_reset(struct drm_device *dev,
>>> struct intel_context *ctx);
>>> uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>>> struct intel_engine_cs *ring);
>>> +u32 intel_execlists_ctx_id(struct intel_context *ctx,
>>> + struct intel_engine_cs *ring);
>>>
>>> /* Execlists */
>>> int intel_sanitize_enable_execlists(struct drm_device *dev, int
>>> enable_execlists);
>>> @@ -113,7 +115,6 @@ struct i915_execbuffer_params;
>>> int intel_execlists_submission(struct i915_execbuffer_params *params,
>>> struct drm_i915_gem_execbuffer2 *args,
>>> struct list_head *vmas);
>>> -u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
>>>
>>> void intel_lrc_irq_handler(struct intel_engine_cs *ring);
>>> void intel_execlists_retire_requests(struct intel_engine_cs *ring);
>>> --
>>> 1.9.1
>>>
>>> _______________________________________________
>>> Intel-gfx mailing list
>>> Intel-gfx at lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> How about caching the WHOLE descriptor so that retrieval becomes
> trivial, retrieving the context ID becomes almost as trivial, as
> does retrieving the LRCA (all these could be inlined) and the single
> calculate-and-cache function can be nicely documented as to how a
> descriptor encodes LRCA etc. Something like this ...
Similar to v3 of my series from yesterday as Daniel already suggested
caching it all. :)
But I think moving towards storing the VMA will be the approach unless
we can agree on that I915_GTT_BAD_ADDRESS idea.
> /**
> * intel_lr_context_descriptor_update() - calculate & cache the
> * descriptor for a pinned
> * context
> * @ctx: Context to work on
> * @ring: Engine the descriptor will be used with
> *
> * The context descriptor encodes various attributes of a context,
> * including its GTT address and some flags. Because it's fairly
> * expensive to calculate, we'll just do it once and cache the result,
> * which remains valid until the context is unpinned.
> *
> * This is what a descriptor looks like, from LSB to MSB:
> * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template)
> * bits 12-31: LRCA, GTT address of (the HWSP of) this context
> * bits 32-51: ctx ID, a globally unique tag (the LRCA again!)
> * bits 52-63: reserved, may encode the engine ID (for GuC)
> */
> static void
> intel_lr_context_descriptor_update(struct intel_context *ctx,
> struct intel_engine_cs *ring)
> {
> struct drm_i915_gem_object *ctx_obj;
> uint64_t lrca, desc;
>
> ctx_obj = ctx->engine[ring->id].state;
> lrca = i915_gem_obj_ggtt_offset(ctx_obj) + LRC_PPHWSP_PN * PAGE_SIZE;
>
> desc = ring->ctx_desc_template; /* bits 0-11 */
> desc |= lrca; /* bits 12-31 */
> desc |= (lrca >> PAGE_SHIFT) << 32; /* bits 32-51 */
>
> ctx->engine[ring->id].ctx_desc = desc;
> }
>
> /**
> * intel_lr_context_descriptor() - get the descriptor for the specified
> * context/engine combination
> * @ctx: Context to get the ID for
> * @ring: Engine to get the ID for
> *
> * Now just a trivial accessor, since the value was computed and cached
> * above at the point when the context was pinned.
> *
> * Return: 64-bit context descriptor (encodes LRCA and various flags)
> */
> uint64_t inline
> intel_lr_context_descriptor(struct intel_context *ctx,
> struct intel_engine_cs *ring)
> {
> return ctx->engine[ring->id].ctx_desc;
> }
>
> /**
> * intel_lr_context_address() - get the GTT address of (the HWSP of) the
> * specified context/engine combination
> * @ctx: Context to get the ID for
> * @ring: Engine to get the ID for
> *
> * The address (LRCA) is a portion of the context descriptor, so we can
> * just extract the required part from the cached descriptor.
> *
> * Return: 32-bit GTT address
> */
> uint32_t inline
> intel_lr_context_address(struct intel_context *ctx,
> struct intel_engine_cs *ring)
> {
> return (u32)intel_lr_context_descriptor(ctx, ring) & PAGE_MASK;
> }
>
> /**
> * intel_execlists_ctx_id() - get the Execlists Context ID
> * @ctx: Context to get the ID for
> * @ring: Engine to get the ID for
> *
> * Do not confuse with ctx->id! Unfortunately we have a name overload
> * here: the old context ID we pass to userspace as a handler so that
> * they can refer to a context, and the new context ID we pass to the
> * ELSP so that the GPU can inform us of the context status via
> * interrupts.
> *
> * The context ID is a portion of the context descriptor, so we can
> * just extract the required part from the cached descriptor.
> *
> * Return: 20-bit globally unique ctx ID (actually, a GTT page number)
> */
> uint32_t inline
> intel_execlists_ctx_id(struct intel_context *ctx,
> struct intel_engine_cs *ring)
> {
> return (u32)(intel_lr_context_descriptor(ctx, ring) >> 32);
> }
>
> ... and insert a call to intel_lr_context_descriptor_update() into the
> appropriate spot in intel_lr_context_do_pin().
Regards,
Tvrtko
More information about the Intel-gfx
mailing list