[Intel-gfx] [PATCH 07/17] drm/i915: Add a relay backed debugfs interface for capturing GuC logs
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Jul 19 11:31:37 UTC 2016
On 10/07/16 14:41, akash.goel at intel.com wrote:
> From: Akash Goel <akash.goel at intel.com>
>
> Added a new debugfs interface '/sys/kernel/debug/dri/guc_log' for the
> User to capture GuC firmware logs. Availed relay framework to implement
> the interface, where Driver will have to just use a relay API to store
> snapshots of the GuC log buffer in the buffer managed by relay.
> The snapshot will be taken when GuC firmware sends a log buffer flush
> interrupt and up to four snaphots could be stored in the relay buffer.
> The relay buffer will be operated in a mode where it will overwrite the
> data not yet collected by User.
> Besides mmap method, through which User can directly access the relay
> buffer contents, relay also supports the 'poll' method. Through the 'poll'
> call on log file, User can come to know whenever a new snapshot of the
> log buffer is taken by Driver, so can run in tandem with the Driver and
> capture the logs in a sustained/streaming manner, without any loss of data.
>
> v2: Defer the creation of relay channel & associated debugfs file, as
> debugfs setup is now done at the end of i915 Driver load. (Chris)
>
> v3:
> - Switch to no-overwrite mode for relay.
> - Fix the relay sub buffer switching sequence.
>
> Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
> Signed-off-by: Akash Goel <akash.goel at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.c | 2 +
> drivers/gpu/drm/i915/i915_guc_submission.c | 197 ++++++++++++++++++++++++++++-
> drivers/gpu/drm/i915/intel_guc.h | 3 +
> 3 files changed, 199 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 25c6b9b..43c9900 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1177,6 +1177,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
> /* Reveal our presence to userspace */
> if (drm_dev_register(dev, 0) == 0) {
> i915_debugfs_register(dev_priv);
> + i915_guc_register(dev);
> i915_setup_sysfs(dev);
> } else
> DRM_ERROR("Failed to register driver for userspace access!\n");
> @@ -1215,6 +1216,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
> intel_opregion_unregister(dev_priv);
>
> i915_teardown_sysfs(&dev_priv->drm);
> + i915_guc_unregister(&dev_priv->drm);
> i915_debugfs_unregister(dev_priv);
> drm_dev_unregister(&dev_priv->drm);
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index d3dbb8e..9b436fa 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -23,6 +23,8 @@
> */
> #include <linux/firmware.h>
> #include <linux/circ_buf.h>
> +#include <linux/debugfs.h>
> +#include <linux/relay.h>
> #include "i915_drv.h"
> #include "intel_guc.h"
>
> @@ -836,12 +838,33 @@ err:
>
> static void guc_move_to_next_buf(struct intel_guc *guc)
> {
> - return;
> + /* Make sure our updates are in the sub buffer are visible when
> + * Consumer sees a newly produced sub buffer.
> + */
> + smp_wmb();
> +
> + /* All data has been written, so now move the offset of sub buffer. */
> + relay_reserve(guc->log.relay_chan, guc->log.obj->base.size);
> +
> + /* Switch to the next sub buffer */
> + relay_flush(guc->log.relay_chan);
> }
>
> static void* guc_get_write_buffer(struct intel_guc *guc)
> {
> - return NULL;
> + /* FIXME: Cover the check under a lock ? */
> + if (!guc->log.relay_chan)
> + return NULL;
> +
> + /* Just get the base address of a new sub buffer and copy data into it
> + * ourselves. NULL will be returned in no-overwrite mode, if all sub
> + * buffers are full. Could have used the relay_write() to indirectly
> + * copy the data, but that would have been bit convoluted, as we need to
> + * write to only certain locations inside a sub buffer which cannot be
> + * done without using relay_reserve() along with relay_write(). So its
> + * better to use relay_reserve() alone.
> + */
> + return relay_reserve(guc->log.relay_chan, 0);
> }
>
> static void guc_read_update_log_buffer(struct drm_device *dev)
> @@ -906,6 +929,119 @@ static void guc_read_update_log_buffer(struct drm_device *dev)
> guc_move_to_next_buf(guc);
> }
>
> +/*
> + * Sub buffer switch callback. Called whenever relay has to switch to a new
> + * sub buffer, relay stays on the same sub buffer if 0 is returned.
> + */
> +static int subbuf_start_callback(struct rchan_buf *buf,
> + void *subbuf,
> + void *prev_subbuf,
> + size_t prev_padding)
> +{
> + /* Use no-overwrite mode by default, where relay will stop accepting
> + * new data if there are no empty sub buffers left.
> + * There is no strict synchronization enforced by relay between Consumer
> + * and Producer. In overwrite mode, there is a possibility of getting
> + * inconsistent/garbled data, the producer could be writing on to the
> + * same sub buffer from which Consumer is reading. This can't be avoided
> + * unless Consumer is fast enough and can always run in tandem with
> + * Producer.
> + */
> + if (relay_buf_full(buf))
> + return 0;
> +
> + return 1;
> +}
> +
> +/*
> + * file_create() callback. Creates relay file in debugfs.
> + */
> +static struct dentry *create_buf_file_callback(const char *filename,
> + struct dentry *parent,
> + umode_t mode,
> + struct rchan_buf *buf,
> + int *is_global)
> +{
> + /*
> + * Not using the channel filename passed as an argument, since for each
> + * channel relay appends the corresponding CPU number to the filename
> + * passed in relay_open(). This should be fine as relay just needs a
> + * dentry of the file associated with the channel buffer and that file's
> + * name need not be same as the filename passed as an argument.
> + */
> + struct dentry *buf_file = debugfs_create_file("guc_log", mode,
> + parent, buf, &relay_file_operations);
> +
> + /* This to enable the use of a single buffer for the relay channel and
> + * correspondingly have a single file exposed to User, through which
> + * it can collect the logs inorder without any post-processing.
> + */
> + *is_global = 1;
> +
> + return buf_file;
> +}
> +
> +/*
> + * file_remove() default callback. Removes relay file in debugfs.
> + */
> +static int remove_buf_file_callback(struct dentry *dentry)
> +{
> + debugfs_remove(dentry);
> + return 0;
> +}
> +
> +/* relay channel callbacks */
> +static struct rchan_callbacks relay_callbacks = {
> + .subbuf_start = subbuf_start_callback,
> + .create_buf_file = create_buf_file_callback,
> + .remove_buf_file = remove_buf_file_callback,
> +};
> +
> +static void guc_remove_log_relay_file(struct intel_guc *guc)
> +{
> + if (guc->log.relay_chan)
> + relay_close(guc->log.relay_chan);
> +}
> +
> +static int guc_create_log_relay_file(struct intel_guc *guc)
> +{
> + struct drm_i915_private *dev_priv = guc_to_i915(guc);
> + struct drm_device *dev = &dev_priv->drm;
> + struct dentry *log_dir;
> + struct rchan *guc_log_relay_chan;
> + size_t n_subbufs, subbuf_size;
> +
> + if (guc->log.relay_chan)
> + return 0;
> +
> + /* If /sys/kernel/debug/dri/0 location do not exist, then debugfs is
> + * not mounted and so can't create the relay file.
> + * The relay API seems to fit well with debugfs only.
> + */
> + if (!dev->primary->debugfs_root)
> + return -ENODEV;
> +
> + /* For now create the log file in /sys/kernel/debug/dri/0 dir */
> + log_dir = dev->primary->debugfs_root;
> +
> + /* Keep the size of sub buffers same as shared log buffer */
> + subbuf_size = guc->log.obj->base.size;
> + /* TODO: Decide based on the User's input */
> + n_subbufs = 4;
> +
> + guc_log_relay_chan = relay_open("guc_log", log_dir,
> + subbuf_size, n_subbufs, &relay_callbacks, dev);
> +
> + if (!guc_log_relay_chan) {
> + DRM_DEBUG_DRIVER("Couldn't create relay chan for guc logs\n");
> + return -ENOMEM;
> + }
> +
> + /* FIXME: Cover the update under a lock ? */
> + guc->log.relay_chan = guc_log_relay_chan;
> + return 0;
> +}
> +
> static void guc_log_cleanup(struct drm_i915_private *dev_priv)
> {
> struct intel_guc *guc = &dev_priv->guc;
> @@ -918,6 +1054,9 @@ static void guc_log_cleanup(struct drm_i915_private *dev_priv)
> /* First disable the flush interrupt */
> gen9_disable_guc_interrupts(dev_priv);
>
> + guc_remove_log_relay_file(guc);
> + guc->log.relay_chan = NULL;
> +
> if (guc->log.buf_addr)
> i915_gem_object_unpin_map(guc->log.obj);
>
> @@ -996,6 +1135,39 @@ static void guc_create_log(struct intel_guc *guc)
> guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
> }
>
> +static int guc_log_late_setup(struct drm_device *dev)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct intel_guc *guc = &dev_priv->guc;
> + int ret;
> +
> + lockdep_assert_held(&dev->struct_mutex);
> +
> + if (i915.guc_log_level < 0)
> + return -EINVAL;
> +
> + if (WARN_ON(guc->log.relay_chan))
> + return -EINVAL;
> +
> + /* If log_level was set as -1 at boot time, then vmalloc mapping would
> + * not have been created for the log buffer, so create one now.
> + */
> + ret = guc_create_log_extras(guc);
> + if (ret)
> + goto err;
> +
> + ret = guc_create_log_relay_file(guc);
> + if (ret)
> + goto err;
> +
> + return 0;
> +err:
> + guc_log_cleanup(dev_priv);
> + /* logging will remain off */
> + i915.guc_log_level = -1;
> + return ret;
> +}
> +
> static void init_guc_policies(struct guc_policies *policies)
> {
> struct guc_policy *policy;
> @@ -1154,7 +1326,6 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
> gem_release_guc_obj(dev_priv->guc.ads_obj);
> guc->ads_obj = NULL;
>
> - guc_log_cleanup(dev_priv);
> gem_release_guc_obj(dev_priv->guc.log.obj);
> guc->log.obj = NULL;
>
> @@ -1232,3 +1403,23 @@ void i915_guc_capture_logs(struct drm_device *dev)
> host2guc_logbuffer_flush_complete(&dev_priv->guc);
> intel_runtime_pm_put(dev_priv);
> }
> +
> +void i915_guc_unregister(struct drm_device *dev)
> +{
> + if (!i915.enable_guc_submission)
> + return;
> +
> + mutex_lock(&dev->struct_mutex);
> + guc_log_cleanup(dev->dev_private);
> + mutex_unlock(&dev->struct_mutex);
> +}
> +
> +void i915_guc_register(struct drm_device *dev)
> +{
> + if (!i915.enable_guc_submission)
> + return;
> +
> + mutex_lock(&dev->struct_mutex);
> + guc_log_late_setup(dev);
> + mutex_unlock(&dev->struct_mutex);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index d4f0fae..a784cf8 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -127,6 +127,7 @@ struct intel_guc_log {
> struct drm_i915_gem_object *obj;
> struct workqueue_struct *wq;
> void *buf_addr;
> + struct rchan *relay_chan;
> };
>
> struct intel_guc {
> @@ -174,5 +175,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq);
> void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
> void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
> void i915_guc_capture_logs(struct drm_device *dev);
> +void i915_guc_register(struct drm_device *dev);
> +void i915_guc_unregister(struct drm_device *dev);
>
> #endif
>
I am clueless about relayfs - do we have someone in house who could
provide a better review of that?
In the mean time, you got TODO and FIXME's in the code so that needs to
be resolved before a full review anyway. :)
Regards,
Tvrtko
More information about the Intel-gfx
mailing list