[Intel-gfx] [PATCH v3 3/3] drm/i915/guc: local optimisations and updating comments

Dave Gordon david.s.gordon at intel.com
Tue Apr 19 16:38:49 UTC 2016


On 19/04/16 16:08, Tvrtko Ursulin wrote:
>
> On 19/04/16 12:45, Dave Gordon wrote:
>> Tidying up guc_init_proc_desc() and adding commentary to the client
>> structure after the recent change in GuC page mapping strategy.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 38
>> ++++++++++++++----------------
>>   drivers/gpu/drm/i915/intel_guc.h           | 23 ++++++++++++++++++
>>   2 files changed, 41 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 135f094..5bbb13b 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -358,12 +358,14 @@ static void guc_init_proc_desc(struct intel_guc
>> *guc,
>>   static void guc_init_ctx_desc(struct intel_guc *guc,
>>                     struct i915_guc_client *client)
>>   {
>> +    struct drm_i915_gem_object *client_obj = client->client_obj;
>>       struct drm_i915_private *dev_priv = guc_to_i915(guc);
>>       struct intel_engine_cs *engine;
>>       struct intel_context *ctx = client->owner;
>>       struct guc_context_desc desc;
>>       struct sg_table *sg;
>>       enum intel_engine_id id;
>> +    u32 gfx_addr;
>>
>>       memset(&desc, 0, sizeof(desc));
>>
>> @@ -392,16 +394,17 @@ static void guc_init_ctx_desc(struct intel_guc
>> *guc,
>>           lrc->context_desc = (u32)ctx_desc;
>>
>>           /* The state page is after PPHWSP */
>> -        lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
>> -                LRC_STATE_PN * PAGE_SIZE;
>> +        gfx_addr = i915_gem_obj_ggtt_offset(obj);
>> +        lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
>>           lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
>>                   (engine->guc_id << GUC_ELC_ENGINE_OFFSET);
>>
>>           obj = ctx->engine[id].ringbuf->obj;
>> +        gfx_addr = i915_gem_obj_ggtt_offset(obj);
>>
>> -        lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
>> -        lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
>> -        lrc->ring_next_free_location = lrc->ring_begin;
>> +        lrc->ring_begin = gfx_addr;
>> +        lrc->ring_end = gfx_addr + obj->base.size - 1;
>> +        lrc->ring_next_free_location = gfx_addr;
>>           lrc->ring_current_tail_pointer_value = 0;
>>
>>           desc.engines_used |= (1 << engine->guc_id);
>> @@ -410,22 +413,17 @@ static void guc_init_ctx_desc(struct intel_guc
>> *guc,
>>       WARN_ON(desc.engines_used == 0);
>>
>>       /*
>> -     * The CPU address is only needed at certain points, so
>> kmap_atomic on
>> -     * demand instead of storing it in the ctx descriptor.
>> -     * XXX: May make debug easier to have it mapped
>> +     * The doorbell, process descriptor, and workqueue are all parts
>> +     * of the client object, which the GuC will reference via the GGTT
>>        */
>> -    desc.db_trigger_cpu = 0;
>> -    desc.db_trigger_uk = client->doorbell_offset +
>> -        i915_gem_obj_ggtt_offset(client->client_obj);
>> -    desc.db_trigger_phy = client->doorbell_offset +
>> -        sg_dma_address(client->client_obj->pages->sgl);
>> -
>> -    desc.process_desc = client->proc_desc_offset +
>> -        i915_gem_obj_ggtt_offset(client->client_obj);
>> -
>> -    desc.wq_addr = client->wq_offset +
>> -        i915_gem_obj_ggtt_offset(client->client_obj);
>> -
>> +    gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
>> +    desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
>> +                client->doorbell_offset;
>> +    desc.db_trigger_cpu = (uintptr_t)client->client_base +
>> +                client->doorbell_offset;
>
> This changed from zero to the address. Was it buggy before?

It's not actually used, AFAICT, but we might as well set it to the 
correct value anyway - this data is shared with the GuC, so I'd like the 
values to be as complete and correct as possible, in case the GuC 
firmware ever starts making use of them. And maybe we could use this for 
consistency checking if we ever found mysterious page faults in the 
doorbell code. But really it's just to show that these three fields all 
describe the same thing, namely where to ring the doorbell, in physical 
(DMA), kernel, and GGTT address spaces.

.Dave.

>> +    desc.db_trigger_uk = gfx_addr + client->doorbell_offset;
>> +    desc.process_desc = gfx_addr + client->proc_desc_offset;
>> +    desc.wq_addr = gfx_addr + client->wq_offset;
>>       desc.wq_size = client->wq_size;
>>
>>       /*
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 19ca593..c503ccd 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -29,6 +29,29 @@
>>
>>   struct drm_i915_gem_request;
>>
>> +/*
>> + * This structure primarily describes the GEM object shared with the
>> GuC.
>> + * The GEM object is held for the entire lifetime of our interaction
>> with
>> + * the GuC, being allocated before the GuC is loaded with its firmware.
>> + * Because there's no way to update the address used by the GuC after
>> + * initialisation, the shared object must stay pinned into the GGTT as
>> + * long as the GuC is in use. We also keep the first page (only) mapped
>> + * into kernel address space, as it includes shared data that must be
>> + * updated on every request submission.
>> + *
>> + * The single GEM object described here is actually made up of several
>> + * separate areas, as far as the GuC is concerned. The first page (kept
>> + * kmap'd) includes the "process decriptor" which holds sequence data
>> for
>
> Typo - descriptor. :) (spotted by Thunderbird)
>
>> + * the doorbell, and one cacheline which actually *is* the doorbell; a
>> + * write to this will "ring" the doorbell (i.e. send an interrupt to the
>> + * GuC). The subsequent  pages of the client object constitute the work
>> + * queue (a circular array of work items), again described in the
>> process
>> + * descriptor. Work queue pages are mapped momentarily as required.
>> + *
>> + * Finally, we also keep a few statistics here, including the number of
>> + * submissions to each engine, and a record of the last submission
>> failure
>> + * (if any).
>> + */
>>   struct i915_guc_client {
>>       struct drm_i915_gem_object *client_obj;
>>       void *client_base;        /* first page (only) of above    */
>>
>
> Looks fine, only question mark over desc.db_trigger_cpu is preventing r-b.
>
> Regards,
>
> Tvrtko




More information about the Intel-gfx mailing list