[Intel-gfx] [PATCH 08/18] drm/i915: Add a relay backed debugfs interface for capturing GuC logs

Goel, Akash akash.goel at intel.com
Mon Aug 15 16:02:14 UTC 2016



On 8/15/2016 8:59 PM, Tvrtko Ursulin wrote:
>
> On 15/08/16 15:49, akash.goel at intel.com wrote:
>> From: Akash Goel <akash.goel at intel.com>
>>
>> Added a new debugfs interface '/sys/kernel/debug/dri/guc_log' for the
>> User to capture GuC firmware logs. Availed relay framework to implement
>> the interface, where Driver will have to just use a relay API to store
>> snapshots of the GuC log buffer in the buffer managed by relay.
>> The snapshot will be taken when GuC firmware sends a log buffer flush
>> interrupt and up to four snapshots could be stored in the relay buffer.
>> The relay buffer will be operated in a mode where it will overwrite the
>> data not yet collected by User.
>> Besides mmap method, through which User can directly access the relay
>> buffer contents, relay also supports the 'poll' method. Through the
>> 'poll'
>> call on log file, User can come to know whenever a new snapshot of the
>> log buffer is taken by Driver, so can run in tandem with the Driver and
>> capture the logs in a sustained/streaming manner, without any loss of
>> data.
>>
>> v2: Defer the creation of relay channel & associated debugfs file, as
>>      debugfs setup is now done at the end of i915 Driver load. (Chris)
>>
>> v3:
>> - Switch to no-overwrite mode for relay.
>> - Fix the relay sub buffer switching sequence.
>>
>> v4:
>> - Update i915 Kconfig to select RELAY config. (TvrtKo)
>> - Log a message when there is no sub buffer available to capture
>>    the GuC log buffer. (Tvrtko)
>> - Increase the number of relay sub buffers to 8 from 4, to have
>>    sufficient buffering for boot time logs
>>
>> v5:
>> - Fix the alignment, indentation issues and some minor cleanup. (Tvrtko)
>> - Update the comment to elaborate on why a relay channel has to be
>>    associated with the debugfs file. (Tvrtko)
>>
>> Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
>> Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
>> Signed-off-by: Akash Goel <akash.goel at intel.com>
>> ---
>>   drivers/gpu/drm/i915/Kconfig               |   1 +
>>   drivers/gpu/drm/i915/i915_drv.c            |   2 +
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 211
>> ++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/i915/intel_guc.h           |   3 +
>>   4 files changed, 215 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
>> index 7769e46..fc900d2 100644
>> --- a/drivers/gpu/drm/i915/Kconfig
>> +++ b/drivers/gpu/drm/i915/Kconfig
>> @@ -11,6 +11,7 @@ config DRM_I915
>>       select DRM_KMS_HELPER
>>       select DRM_PANEL
>>       select DRM_MIPI_DSI
>> +    select RELAY
>>       # i915 depends on ACPI_VIDEO when ACPI is enabled
>>       # but for select to work, need to select ACPI_VIDEO's
>> dependencies, ick
>>       select BACKLIGHT_LCD_SUPPORT if ACPI
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index 13ae340..cdee60b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -1133,6 +1133,7 @@ static void i915_driver_register(struct
>> drm_i915_private *dev_priv)
>>       /* Reveal our presence to userspace */
>>       if (drm_dev_register(dev, 0) == 0) {
>>           i915_debugfs_register(dev_priv);
>> +        i915_guc_register(dev_priv);
>>           i915_setup_sysfs(dev);
>>       } else
>>           DRM_ERROR("Failed to register driver for userspace access!\n");
>> @@ -1171,6 +1172,7 @@ static void i915_driver_unregister(struct
>> drm_i915_private *dev_priv)
>>       intel_opregion_unregister(dev_priv);
>>
>>       i915_teardown_sysfs(&dev_priv->drm);
>> +    i915_guc_unregister(dev_priv);
>>       i915_debugfs_unregister(dev_priv);
>>       drm_dev_unregister(&dev_priv->drm);
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 2b27b87..9b1054c 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -23,6 +23,8 @@
>>    */
>>   #include <linux/firmware.h>
>>   #include <linux/circ_buf.h>
>> +#include <linux/debugfs.h>
>> +#include <linux/relay.h>
>>   #include "i915_drv.h"
>>   #include "intel_guc.h"
>>
>> @@ -837,13 +839,159 @@ err:
>>       return NULL;
>>   }
>>
>> +/*
>> + * Sub buffer switch callback. Called whenever relay has to switch to
>> a new
>> + * sub buffer, relay stays on the same sub buffer if 0 is returned.
>> + */
>> +static int subbuf_start_callback(struct rchan_buf *buf,
>> +                 void *subbuf,
>> +                 void *prev_subbuf,
>> +                 size_t prev_padding)
>> +{
>> +    /* Use no-overwrite mode by default, where relay will stop accepting
>> +     * new data if there are no empty sub buffers left.
>> +     * There is no strict synchronization enforced by relay between
>> Consumer
>> +     * and Producer. In overwrite mode, there is a possibility of
>> getting
>> +     * inconsistent/garbled data, the producer could be writing on to
>> the
>> +     * same sub buffer from which Consumer is reading. This can't be
>> avoided
>> +     * unless Consumer is fast enough and can always run in tandem with
>> +     * Producer.
>> +     */
>> +    if (relay_buf_full(buf))
>> +        return 0;
>> +
>> +    return 1;
>> +}
>> +
>> +/*
>> + * file_create() callback. Creates relay file in debugfs.
>> + */
>> +static struct dentry *create_buf_file_callback(const char *filename,
>> +                           struct dentry *parent,
>> +                           umode_t mode,
>> +                           struct rchan_buf *buf,
>> +                           int *is_global)
>> +{
>> +    struct dentry *buf_file;
>> +
>> +    /* This to enable the use of a single buffer for the relay
>> channel and
>> +     * correspondingly have a single file exposed to User, through which
>> +     * it can collect the logs in order without any post-processing.
>> +     */
>> +    *is_global = 1;
>> +
>> +    if (!parent)
>> +        return NULL;
>
> Should writing to is_global be after this check?
>
We need to set 'is_global', even if parent is NULL to support early 
logging, but yes this could have been done in the last patch.

In this patch it can be placed after the NULL check and the last patch 
will move it before the check.

Best regards
Akash

>> +
>> +    /* Not using the channel filename passed as an argument, since
>> for each
>> +     * channel relay appends the corresponding CPU number to the
>> filename
>> +     * passed in relay_open(). This should be fine as relay just needs a
>> +     * dentry of the file associated with the channel buffer and that
>> file's
>> +     * name need not be same as the filename passed as an argument.
>> +     */
>> +    buf_file = debugfs_create_file("guc_log", mode,
>> +                       parent, buf, &relay_file_operations);
>> +    return buf_file;
>> +}
>> +
>> +/*
>> + * file_remove() default callback. Removes relay file in debugfs.
>> + */
>> +static int remove_buf_file_callback(struct dentry *dentry)
>> +{
>> +    debugfs_remove(dentry);
>> +    return 0;
>> +}
>> +
>> +/* relay channel callbacks */
>> +static struct rchan_callbacks relay_callbacks = {
>> +    .subbuf_start = subbuf_start_callback,
>> +    .create_buf_file = create_buf_file_callback,
>> +    .remove_buf_file = remove_buf_file_callback,
>> +};
>> +
>> +static void guc_remove_log_relay_file(struct intel_guc *guc)
>> +{
>> +    relay_close(guc->log.relay_chan);
>> +}
>> +
>> +static int guc_create_log_relay_file(struct intel_guc *guc)
>> +{
>> +    struct drm_i915_private *dev_priv = guc_to_i915(guc);
>> +    struct rchan *guc_log_relay_chan;
>> +    struct dentry *log_dir;
>> +    size_t n_subbufs, subbuf_size;
>> +
>> +    /* For now create the log file in /sys/kernel/debug/dri/0 dir */
>> +    log_dir = dev_priv->drm.primary->debugfs_root;
>> +
>> +    /* If /sys/kernel/debug/dri/0 location do not exist, then debugfs is
>> +     * not mounted and so can't create the relay file.
>> +     * The relay API seems to fit well with debugfs only, for
>> availing relay
>> +     * there are 3 requirements which can be met for debugfs file
>> only in a
>> +     * straightforward/clean manner :-
>> +     * i)   Need the associated dentry pointer of the file, while
>> opening the
>> +     *      relay channel.
>> +     * ii)  Should be able to use 'relay_file_operations' fops for
>> the file.
>> +     * iii) Set the 'i_private' field of file's inode to the pointer of
>> +     *    relay channel buffer.
>> +     */
>> +    if (!log_dir) {
>> +        DRM_ERROR("Debugfs dir not available yet for GuC log file\n");
>> +        return -ENODEV;
>> +    }
>> +
>> +    /* Keep the size of sub buffers same as shared log buffer */
>> +    subbuf_size = guc->log.vma->obj->base.size;
>> +
>> +    /* Store up to 8 snapshots, which is large enough to buffer
>> sufficient
>> +     * boot time logs and provides enough leeway to User, in terms of
>> +     * latency, for consuming the logs from relay. Also doesn't take
>> +     * up too much memory.
>> +     */
>> +    n_subbufs = 8;
>> +
>> +    guc_log_relay_chan = relay_open("guc_log", log_dir, subbuf_size,
>> +                    n_subbufs, &relay_callbacks, dev_priv);
>> +    if (!guc_log_relay_chan) {
>> +        DRM_ERROR("Couldn't create relay chan for GuC logging\n");
>> +        return -ENOMEM;
>> +    }
>> +
>> +    /* FIXME: Cover the update under a lock ? */
>> +    guc->log.relay_chan = guc_log_relay_chan;
>> +    return 0;
>> +}
>> +
>>   static void guc_move_to_next_buf(struct intel_guc *guc)
>>   {
>> +    /* Make sure the updates made in the sub buffer are visible when
>> +     * Consumer sees the following update to offset inside the sub
>> buffer.
>> +     */
>> +    smp_wmb();
>> +
>> +    /* All data has been written, so now move the offset of sub
>> buffer. */
>> +    relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size);
>> +
>> +    /* Switch to the next sub buffer */
>> +    relay_flush(guc->log.relay_chan);
>>   }
>>
>>   static void *guc_get_write_buffer(struct intel_guc *guc)
>>   {
>> -    return NULL;
>> +    /* FIXME: Cover the check under a lock ? */
>> +    if (!guc->log.relay_chan)
>> +        return NULL;
>> +
>> +    /* Just get the base address of a new sub buffer and copy data
>> into it
>> +     * ourselves. NULL will be returned in no-overwrite mode, if all sub
>> +     * buffers are full. Could have used the relay_write() to indirectly
>> +     * copy the data, but that would have been bit convoluted, as we
>> need to
>> +     * write to only certain locations inside a sub buffer which
>> cannot be
>> +     * done without using relay_reserve() along with relay_write().
>> So its
>> +     * better to use relay_reserve() alone.
>> +     */
>> +    return relay_reserve(guc->log.relay_chan, 0);
>>   }
>>
>>   static void guc_read_update_log_buffer(struct intel_guc *guc)
>> @@ -929,6 +1077,12 @@ static void guc_read_update_log_buffer(struct
>> intel_guc *guc)
>>
>>       if (log_buffer_snapshot_state)
>>           guc_move_to_next_buf(guc);
>> +    else {
>> +        /* Used rate limited to avoid deluge of messages, logs might be
>> +         * getting consumed by User at a slow rate.
>> +         */
>> +        DRM_ERROR_RATELIMITED("no sub-buffer to capture log buffer\n");
>> +    }
>>   }
>>
>>   static void guc_capture_logs_work(struct work_struct *work)
>> @@ -964,6 +1118,11 @@ static void guc_log_cleanup(struct intel_guc *guc)
>>
>>       guc->log.flush_wq = NULL;
>>
>> +    if (guc->log.relay_chan)
>> +        guc_remove_log_relay_file(guc);
>> +
>> +    guc->log.relay_chan = NULL;
>> +
>>       if (guc->log.buf_addr)
>>           i915_gem_object_unpin_map(guc->log.vma->obj);
>>
>> @@ -1054,6 +1213,35 @@ static void guc_create_log(struct intel_guc *guc)
>>       guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
>>   }
>>
>> +static int guc_log_late_setup(struct intel_guc *guc)
>> +{
>> +    struct drm_i915_private *dev_priv = guc_to_i915(guc);
>> +    int ret;
>> +
>> +    lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> +
>> +    if (i915.guc_log_level < 0)
>> +        return -EINVAL;
>> +
>> +    /* If log_level was set as -1 at boot time, then vmalloc mapping
>> would
>> +     * not have been created for the log buffer, so create one now.
>> +     */
>> +    ret = guc_create_log_extras(guc);
>> +    if (ret)
>> +        goto err;
>> +
>> +    ret = guc_create_log_relay_file(guc);
>> +    if (ret)
>> +        goto err;
>> +
>> +    return 0;
>> +err:
>> +    guc_log_cleanup(guc);
>> +    /* logging will remain off */
>> +    i915.guc_log_level = -1;
>> +    return ret;
>> +}
>> +
>>   static void init_guc_policies(struct guc_policies *policies)
>>   {
>>       struct guc_policy *policy;
>> @@ -1223,7 +1411,6 @@ void i915_guc_submission_fini(struct
>> drm_i915_private *dev_priv)
>>       struct intel_guc *guc = &dev_priv->guc;
>>
>>       i915_vma_unpin_and_release(&guc->ads_vma);
>> -    guc_log_cleanup(guc);
>>       i915_vma_unpin_and_release(&guc->log.vma);
>>
>>       if (guc->ctx_pool_vma)
>> @@ -1297,3 +1484,23 @@ void i915_guc_capture_logs(struct
>> drm_i915_private *dev_priv)
>>       host2guc_logbuffer_flush_complete(&dev_priv->guc);
>>       intel_runtime_pm_put(dev_priv);
>>   }
>> +
>> +void i915_guc_unregister(struct drm_i915_private *dev_priv)
>> +{
>> +    if (!i915.enable_guc_submission)
>> +        return;
>> +
>> +    mutex_lock(&dev_priv->drm.struct_mutex);
>> +    guc_log_cleanup(&dev_priv->guc);
>> +    mutex_unlock(&dev_priv->drm.struct_mutex);
>> +}
>> +
>> +void i915_guc_register(struct drm_i915_private *dev_priv)
>> +{
>> +    if (!i915.enable_guc_submission)
>> +        return;
>> +
>> +    mutex_lock(&dev_priv->drm.struct_mutex);
>> +    guc_log_late_setup(&dev_priv->guc);
>> +    mutex_unlock(&dev_priv->drm.struct_mutex);
>> +}
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index d053a18..3299cce 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -127,6 +127,7 @@ struct intel_guc_log {
>>       void *buf_addr;
>>       struct workqueue_struct *flush_wq;
>>       struct work_struct flush_work;
>> +    struct rchan *relay_chan;
>>   };
>>
>>   struct intel_guc {
>> @@ -171,5 +172,7 @@ int i915_guc_wq_check_space(struct
>> drm_i915_gem_request *rq);
>>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
>>   void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
>>   void i915_guc_capture_logs(struct drm_i915_private *dev_priv);
>> +void i915_guc_register(struct drm_i915_private *dev_priv);
>> +void i915_guc_unregister(struct drm_i915_private *dev_priv);
>>
>>   #endif
>>
>
> The rest looks fine to me, with a disclaimer that I don't know relayfs.
>
> Regards,
>
> Tvrtko


More information about the Intel-gfx mailing list