[Intel-gfx] [PATCH 14/18] drm/i915: Forcefully flush GuC log buffer on reset

Tue Aug 16 05:25:52 UTC 2016

On 8/15/2016 9:18 PM, Tvrtko Ursulin wrote:
>
> On 15/08/16 15:49, akash.goel at intel.com wrote:
>> From: Sagar Arun Kamble <sagar.a.kamble at intel.com>
>>
>> Before capturing the GuC logs as a part of error state, there should be a
>> force log buffer flush action sent to GuC before proceeding with GPU
>> reset
>> and re-initializing GUC. There could be some data in the log buffer which
>> is yet to be captured and those logs would be particularly useful to
>> understand that why the GPU reset was initiated.
>>
>> v2:
>> - Avoid the wait via flush_work, to serialize against an ongoing log
>>    buffer flush, from the error state capture path. (Chris)
>
> Could you explain if the patch does anything now that the flush has been
> removed?
>
flush_work for the regular log buffer flush work item has been removed
but the forceful command is still sent to GuC.

> In fact I don't even understand what it was doing before. :)
>
I am sorry for that.

> If the idea is to send a flush command to GuC so it can raise an
> interrupt for a partially full buffer,
Yes exactly this is the idea.

> then i915_guc_flush_logs should
> send the flush command and wait for that interrupt/work.
>
> But the function is first waiting for the work item to complete and then
> sending the flush command to the GuC. So I am confused.
>
Actually GuC firmware just ignores the forceful flush command received 
from Host, if it sees there is a pending request for regular log buffer
flush, for which it hasn't received the ack.

So that's why from Host side, before sending the forceful flush command 
to GuC, had to first wait for the regular log buffer flush work item to
finish execution.

Best regards
Akash

> Regards,
>
> Tvrtko
>
>> - Rebase.
>>
>> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
>> Signed-off-by: Akash Goel <akash.goel at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gpu_error.c      |  2 ++
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 30
>> ++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_guc.h           |  1 +
>>   3 files changed, 33 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c
>> b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index 94297aa..b73c671 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -1301,6 +1301,8 @@ static void
>> i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
>>       if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0))
>>           return;
>>
>> +    i915_guc_flush_logs(dev_priv, false);
>> +
>>       error->guc_log = i915_error_object_create(dev_priv,
>>                             dev_priv->guc.log.vma);
>>   }
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index b8d6313..85df2f3 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -185,6 +185,16 @@ static int
>> host2guc_logbuffer_flush_complete(struct intel_guc *guc)
>>       return host2guc_action(guc, data, 1);
>>   }
>>
>> +static int host2guc_force_logbuffer_flush(struct intel_guc *guc)
>> +{
>> +    u32 data[2];
>> +
>> +    data[0] = HOST2GUC_ACTION_FORCE_LOG_BUFFER_FLUSH;
>> +    data[1] = 0;
>> +
>> +    return host2guc_action(guc, data, 2);
>> +}
>> +
>>   /*
>>    * Initialise, update, or clear doorbell data shared with the GuC
>>    *
>> @@ -1536,6 +1546,26 @@ void i915_guc_capture_logs(struct
>> drm_i915_private *dev_priv)
>>       intel_runtime_pm_put(dev_priv);
>>   }
>>
>> +void i915_guc_flush_logs(struct drm_i915_private *dev_priv, bool
>> can_wait)
>> +{
>> +    if (!i915.enable_guc_submission || (i915.guc_log_level < 0))
>> +        return;
>> +
>> +    /* First disable the interrupts, will be renabled afterwards */
>> +    gen9_disable_guc_interrupts(dev_priv);
>> +
>> +    /* Before initiating the forceful flush, wait for any
>> pending/ongoing
>> +     * flush to complete otherwise forceful flush may not happen, but
>> wait
>> +     * can't be done for some paths like error state capture in which
>> case
>> +     * take a chance & directly attempt the forceful flush.
>> +     */
>> +    if (can_wait)
>> +        flush_work(&dev_priv->guc.log.flush_work);
>> +
>> +    /* Ask GuC to update the log buffer state */
>> +    host2guc_force_logbuffer_flush(&dev_priv->guc);
>> +}
>> +
>>   void i915_guc_unregister(struct drm_i915_private *dev_priv)
>>   {
>>       if (!i915.enable_guc_submission)
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 8598f38..d7eda42 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -182,6 +182,7 @@ int i915_guc_wq_check_space(struct
>> drm_i915_gem_request *rq);
>>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
>>   void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
>>   void i915_guc_capture_logs(struct drm_i915_private *dev_priv);
>> +void i915_guc_flush_logs(struct drm_i915_private *dev_priv, bool
>> can_wait);
>>   void i915_guc_register(struct drm_i915_private *dev_priv);
>>   void i915_guc_unregister(struct drm_i915_private *dev_priv);
>>
>>