[Intel-gfx] [PATCH 06/17] drm/i915: Handle log buffer flush interrupt event from GuC

Wed Jul 20 03:29:11 UTC 2016

On 7/19/2016 4:28 PM, Tvrtko Ursulin wrote:
>
> On 10/07/16 14:41, akash.goel at intel.com wrote:
>> From: Sagar Arun Kamble <sagar.a.kamble at intel.com>
>>
>> GuC ukernel sends an interrupt to Host to flush the log buffer
>> and expects Host to correspondingly update the read pointer
>> information in the state structure, once it has consumed the
>> log buffer contents by copying them to a file or buffer.
>> Even if Host couldn't copy the contents, it can still update the
>> read pointer so that logging state is not disturbed on GuC side.
>>
>> v2:
>> - Use a dedicated workqueue for handling flush interrupt. (Tvrtko)
>> - Reduce the overall log buffer copying time by skipping the copy of
>>    crash buffer area for regular cases and copying only the state
>>    structure data in first page.
>>
>> v3:
>>   - Create a vmalloc mapping of log buffer. (Chris)
>>   - Cover the flush acknowledgment under rpm get & put.(Chris)
>>   - Revert the change of skipping the copy of crash dump area, as
>>     not really needed, will be covered by subsequent patch.
>>
>> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
>> Signed-off-by: Akash Goel <akash.goel at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.c            |  13 +++
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 148
>> +++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_irq.c            |   5 +-
>>   drivers/gpu/drm/i915/intel_guc.h           |   3 +
>>   4 files changed, 167 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index b9a8117..25c6b9b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -791,8 +791,20 @@ static int i915_workqueues_init(struct
>> drm_i915_private *dev_priv)
>>       if (dev_priv->hotplug.dp_wq == NULL)
>>           goto out_free_wq;
>>
>> +    if (HAS_GUC_SCHED(dev_priv)) {
>> +        /* Need a dedicated wq to process log buffer flush interrupts
>> +         * from GuC without much delay so as to avoid any loss of logs.
>> +         */
>> +        dev_priv->guc.log.wq =
>> +            alloc_ordered_workqueue("i915-guc_log", 0);
>> +        if (dev_priv->guc.log.wq == NULL)
>> +            goto out_free_hotplug_dp_wq;
>> +    }
>> +
>>       return 0;
>>
>> +out_free_hotplug_dp_wq:
>> +    destroy_workqueue(dev_priv->hotplug.dp_wq);
>>   out_free_wq:
>>       destroy_workqueue(dev_priv->wq);
>>   out_err:
>> @@ -803,6 +815,7 @@ out_err:
>>
>>   static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
>>   {
>> +    destroy_workqueue(dev_priv->guc.log.wq);
>
> I am ignoring the wq parts of the patch since the next series may look
> different in this respect.
>
> However you may need to have wq destruction under the same HAS_GUC_SCHED
> condition as when you create it.

Thanks, will do.
Sorry, my bad.
>
>>       destroy_workqueue(dev_priv->hotplug.dp_wq);
>>       destroy_workqueue(dev_priv->wq);
>>   }
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 0bac172..d3dbb8e 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -172,6 +172,15 @@ static int host2guc_sample_forcewake(struct
>> intel_guc *guc,
>>       return host2guc_action(guc, data, ARRAY_SIZE(data));
>>   }
>>
>> +static int host2guc_logbuffer_flush_complete(struct intel_guc *guc)
>> +{
>> +    u32 data[1];
>> +
>> +    data[0] = HOST2GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE;
>> +
>> +    return host2guc_action(guc, data, 1);
>> +}
>> +
>>   /*
>>    * Initialise, update, or clear doorbell data shared with the GuC
>>    *
>> @@ -825,6 +834,123 @@ err:
>>       return NULL;
>>   }
>>
>> +static void guc_move_to_next_buf(struct intel_guc *guc)
>> +{
>> +    return;
>> +}
>> +
>> +static void* guc_get_write_buffer(struct intel_guc *guc)
>> +{
>> +    return NULL;
>> +}
>> +
>> +static void guc_read_update_log_buffer(struct drm_device *dev)
>
> dev_priv should be passed in for driver internal functions.
>
>> +{
>> +    struct drm_i915_private *dev_priv = dev->dev_private;
>> +    struct intel_guc *guc = &dev_priv->guc;
>> +    struct guc_log_buffer_state *log_buffer_state,
>> *log_buffer_copy_state;
>> +    struct guc_log_buffer_state log_buffer_state_local;
>> +    void *src_data_ptr, *dst_data_ptr;
>> +    u32 i, buffer_size;
>> +
>> +    if (!guc->log.obj || !guc->log.buf_addr)
>> +        return;
>> +
>> +    log_buffer_state = src_data_ptr = guc->log.buf_addr;
>> +
>> +    /* Get the pointer to local buffer to store the logs */
>> +    dst_data_ptr = log_buffer_copy_state = guc_get_write_buffer(guc);
>
> This will return NULL so the loop below doesn't do anything much. I
> assume at this point in the patch series things are not wired up yet?
>
The below loop will still update the state structures, lying in the 
first page of GuC log buffer.
There is no local buffer yet to store the logs.

>> +
>> +    /* Actual logs are present from the 2nd page */
>> +    src_data_ptr += PAGE_SIZE;
>> +    dst_data_ptr += PAGE_SIZE;
>> +
>> +    for (i = 0; i < GUC_MAX_LOG_BUFFER; i++) {
>> +        log_buffer_state_local = *log_buffer_state;
>> +        buffer_size = log_buffer_state_local.size;
>> +
>> +        if (log_buffer_copy_state) {
>> +            /* First copy the state structure */
>> +            memcpy(log_buffer_copy_state, &log_buffer_state_local,
>> +                    sizeof(struct guc_log_buffer_state));
>
> For some reason I find it hard to grasp the meaning of the different
> variables with slightly different names:
>
> log_buffer_state
This is the pointer to GuC log buffer.

> log_buffer_copy_state,
This is the pointer to relay sub buffer, it may be NULL also if there 
are no empty sub buffers left to store the logs.

> log_buffer_state_local
This is the local copy of state structure on the stack.
	log_buffer_state_local = *log_buffer state;

This way we access the state structure fields in GuC log buffer
(uncached mapped) for read only once.
Actually there are multiple accesses required to state structures in GuC 
log buffer.
One access is required to copy the state structures from GuC log buffer 
to the relay sub buffer.
Then for some bookkeeping, we have to (re)access some of the individual 
fields inside the state structure (please refer the subsequent patch
drm/i915: Add stats for GuC log buffer flush interrupts).

>
> The _local one for example, it seems to be copied into from the GuC
> mapping and then copied again into log_buffer_copy_state.
>
> Why do you need two copies of it?
As mentioned above, log_buffer_copy_state could be NULL also.

> And why sometimes the code does a structure copy and sometimes explicit
> memcpy?
>
> I think even worse, when it does a structure copy:
>
> log_buffer_state_local = *log_buffer state;
>
> The log_buffer_state is a GEM object mapping. So in my mind it would be
> better the do the explicit memcpy there.
>
> And then this memcpy between the local copy and write buffer, that will
> be into the relay sub buffer?
>
Fine for consistency will use memcpy only to make a local copy of the 
state structures on stack.
	memcpy(&log_buffer_state_local, log_buffer_state,
			sizeof(struct guc_log_buffer_state));

> It seems to me there is no real need for log_buffer_state_local since
> the bottom of the loop still accesses it via the GEM obj mapping
> (log_buffer_state)
>
> So don't know, I find it all confusing.
Sorry for all the confusion, did it like this to make sure we access the 
state structures in GuC log buffer only once for reads.

>> +
>> +            /* The write pointer could have been updated by the GuC
>> +             * firmware, after sending the flush interrupt to Host,
>> +             * for consistency set the write pointer value to same
>> +             * value of sampled_write_ptr in the snapshot buffer.
>> +             */
>> +            log_buffer_copy_state->write_ptr =
>> +                log_buffer_copy_state->sampled_write_ptr;
>> +
>> +            log_buffer_copy_state++;
>> +
>> +            /* Now copy the actual logs */
>> +            memcpy(dst_data_ptr, src_data_ptr, buffer_size);
>
> Doesn't this copy (and overwrite) the same location
> log_buffer_copy_state points to? So the memcpy above and the write_ptr
> update (why?) are then overwritten by this memcpy because dst_data_ptr
> == log_buffer_copy_state at this point unless I am missing something.
>
log_buffer_copy_state initially points to the start of first page, 
whereas dst_data_ptr initially points to the start of 2nd page (where 
the actual logs are present).

I think you missed the following increment, right before the loop,
	/* Actual logs are present from the 2nd page */
	src_data_ptr += PAGE_SIZE;
	dst_data_ptr += PAGE_SIZE;

>> +
>> +            src_data_ptr += buffer_size;
>> +            dst_data_ptr += buffer_size;
>> +        }
>> +
>> +        /* FIXME: invalidate/flush for log buffer needed */
>> +
>> +        /* Update the read pointer in the shared log buffer */
>> +        log_buffer_state->read_ptr =
>> +            log_buffer_state_local.sampled_write_ptr;
>> +
>> +        /* Clear the 'flush to file' flag */
>> +        log_buffer_state->flush_to_file = 0;
>> +        log_buffer_state++;
>> +    }
>> +
>> +    if (log_buffer_copy_state)
>> +        guc_move_to_next_buf(guc);
>> +}
>> +
>> +static void guc_log_cleanup(struct drm_i915_private *dev_priv)
>> +{
>> +    struct intel_guc *guc = &dev_priv->guc;
>> +
>> +    lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> +
>> +    if (i915.guc_log_level < 0)
>> +        return;
>> +
>> +    /* First disable the flush interrupt */
>> +    gen9_disable_guc_interrupts(dev_priv);
>> +
>> +    if (guc->log.buf_addr)
>> +        i915_gem_object_unpin_map(guc->log.obj);
>> +
>> +    guc->log.buf_addr = NULL;
>> +}
>> +
>> +static int guc_create_log_extras(struct intel_guc *guc)
>> +{
>> +    struct drm_i915_private *dev_priv = guc_to_i915(guc);
>> +    void *vaddr;
>> +    int ret;
>> +
>> +    lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> +
>> +    /* Nothing to do */
>> +    if (i915.guc_log_level < 0)
>> +        return 0;
>> +
>> +    if (!guc->log.buf_addr) {
>> +        /* Create a vmalloc mapping of log buffer pages */
>> +        vaddr = i915_gem_object_pin_map(guc->log.obj);
>> +        if (IS_ERR(vaddr)) {
>> +            ret = PTR_ERR(vaddr);
>> +            DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
>> +            return ret;
>> +        }
>> +
>> +        guc->log.buf_addr = vaddr;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static void guc_create_log(struct intel_guc *guc)
>>   {
>>       struct drm_i915_private *dev_priv = guc_to_i915(guc);
>> @@ -851,6 +977,13 @@ static void guc_create_log(struct intel_guc *guc)
>>           }
>>
>>           guc->log.obj = obj;
>> +
>> +        if (guc_create_log_extras(guc)) {
>> +            gem_release_guc_obj(guc->log.obj);
>> +            guc->log.obj = NULL;
>> +            i915.guc_log_level = -1;
>> +            return;
>> +        }
>>       }
>>
>>       /* each allocated unit is a page */
>> @@ -1021,6 +1154,7 @@ void i915_guc_submission_fini(struct
>> drm_i915_private *dev_priv)
>>       gem_release_guc_obj(dev_priv->guc.ads_obj);
>>       guc->ads_obj = NULL;
>>
>> +    guc_log_cleanup(dev_priv);
>>       gem_release_guc_obj(dev_priv->guc.log.obj);
>>       guc->log.obj = NULL;
>>
>> @@ -1084,3 +1218,17 @@ int intel_guc_resume(struct drm_device *dev)
>>
>>       return host2guc_action(guc, data, ARRAY_SIZE(data));
>>   }
>> +
>> +void i915_guc_capture_logs(struct drm_device *dev)
>> +{
>> +    struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +    guc_read_update_log_buffer(dev);
>> +
>> +    /* Generally device is expected to be active only at this
>> +     * time, so get/put should be really quick.
>> +     */
>> +    intel_runtime_pm_get(dev_priv);
>> +    host2guc_logbuffer_flush_complete(&dev_priv->guc);
>> +    intel_runtime_pm_put(dev_priv);
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_irq.c
>> b/drivers/gpu/drm/i915/i915_irq.c
>> index fd73c94..f90d3c6 100644
>> --- a/drivers/gpu/drm/i915/i915_irq.c
>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>> @@ -1221,7 +1221,7 @@ static void gen9_guc2host_events_work(struct
>> work_struct *work)
>>       }
>>       spin_unlock_irq(&dev_priv->irq_lock);
>>
>> -    /* TODO: Handle the events for which GuC interrupted host */
>> +    i915_guc_capture_logs(&dev_priv->drm);
>>   }
>>
>>   /**
>> @@ -1707,7 +1707,8 @@ static void gen9_guc_irq_handler(struct
>> drm_i915_private *dev_priv, u32 gt_iir)
>>                       I915_READ(SOFT_SCRATCH(15)) & ~msg);
>>
>>                   /* Handle flush interrupt event in bottom half */
>> -                queue_work(dev_priv->wq, &dev_priv->guc.events_work);
>> +                queue_work(dev_priv->guc.log.wq,
>> +                        &dev_priv->guc.events_work);
>>               }
>>           }
>>           spin_unlock(&dev_priv->irq_lock);
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 2663b41..d4f0fae 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -125,6 +125,8 @@ struct intel_guc_fw {
>>   struct intel_guc_log {
>>       uint32_t flags;
>>       struct drm_i915_gem_object *obj;
>> +    struct workqueue_struct *wq;
>> +    void *buf_addr;
>>   };
>>
>>   struct intel_guc {
>> @@ -171,5 +173,6 @@ int i915_guc_wq_check_space(struct
>> drm_i915_gem_request *rq);
>>   int i915_guc_submit(struct drm_i915_gem_request *rq);
>>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
>>   void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
>> +void i915_guc_capture_logs(struct drm_device *dev);
>>
>>   #endif
>>
>
> Regards,
>
> Tvrtko
>