[Intel-gfx] [PATCH 08/18] drm/i915: Add a relay backed debugfs interface for capturing GuC logs

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon Aug 15 15:29:50 UTC 2016


On 15/08/16 15:49, akash.goel at intel.com wrote:
> From: Akash Goel <akash.goel at intel.com>
>
> Added a new debugfs interface '/sys/kernel/debug/dri/guc_log' for the
> User to capture GuC firmware logs. Availed relay framework to implement
> the interface, where Driver will have to just use a relay API to store
> snapshots of the GuC log buffer in the buffer managed by relay.
> The snapshot will be taken when GuC firmware sends a log buffer flush
> interrupt and up to four snapshots could be stored in the relay buffer.
> The relay buffer will be operated in a mode where it will overwrite the
> data not yet collected by User.
> Besides mmap method, through which User can directly access the relay
> buffer contents, relay also supports the 'poll' method. Through the 'poll'
> call on log file, User can come to know whenever a new snapshot of the
> log buffer is taken by Driver, so can run in tandem with the Driver and
> capture the logs in a sustained/streaming manner, without any loss of data.
>
> v2: Defer the creation of relay channel & associated debugfs file, as
>      debugfs setup is now done at the end of i915 Driver load. (Chris)
>
> v3:
> - Switch to no-overwrite mode for relay.
> - Fix the relay sub buffer switching sequence.
>
> v4:
> - Update i915 Kconfig to select RELAY config. (TvrtKo)
> - Log a message when there is no sub buffer available to capture
>    the GuC log buffer. (Tvrtko)
> - Increase the number of relay sub buffers to 8 from 4, to have
>    sufficient buffering for boot time logs
>
> v5:
> - Fix the alignment, indentation issues and some minor cleanup. (Tvrtko)
> - Update the comment to elaborate on why a relay channel has to be
>    associated with the debugfs file. (Tvrtko)
>
> Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
> Signed-off-by: Akash Goel <akash.goel at intel.com>
> ---
>   drivers/gpu/drm/i915/Kconfig               |   1 +
>   drivers/gpu/drm/i915/i915_drv.c            |   2 +
>   drivers/gpu/drm/i915/i915_guc_submission.c | 211 ++++++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/intel_guc.h           |   3 +
>   4 files changed, 215 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 7769e46..fc900d2 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -11,6 +11,7 @@ config DRM_I915
>   	select DRM_KMS_HELPER
>   	select DRM_PANEL
>   	select DRM_MIPI_DSI
> +	select RELAY
>   	# i915 depends on ACPI_VIDEO when ACPI is enabled
>   	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
>   	select BACKLIGHT_LCD_SUPPORT if ACPI
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 13ae340..cdee60b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1133,6 +1133,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
>   	/* Reveal our presence to userspace */
>   	if (drm_dev_register(dev, 0) == 0) {
>   		i915_debugfs_register(dev_priv);
> +		i915_guc_register(dev_priv);
>   		i915_setup_sysfs(dev);
>   	} else
>   		DRM_ERROR("Failed to register driver for userspace access!\n");
> @@ -1171,6 +1172,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
>   	intel_opregion_unregister(dev_priv);
>
>   	i915_teardown_sysfs(&dev_priv->drm);
> +	i915_guc_unregister(dev_priv);
>   	i915_debugfs_unregister(dev_priv);
>   	drm_dev_unregister(&dev_priv->drm);
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 2b27b87..9b1054c 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -23,6 +23,8 @@
>    */
>   #include <linux/firmware.h>
>   #include <linux/circ_buf.h>
> +#include <linux/debugfs.h>
> +#include <linux/relay.h>
>   #include "i915_drv.h"
>   #include "intel_guc.h"
>
> @@ -837,13 +839,159 @@ err:
>   	return NULL;
>   }
>
> +/*
> + * Sub buffer switch callback. Called whenever relay has to switch to a new
> + * sub buffer, relay stays on the same sub buffer if 0 is returned.
> + */
> +static int subbuf_start_callback(struct rchan_buf *buf,
> +				 void *subbuf,
> +				 void *prev_subbuf,
> +				 size_t prev_padding)
> +{
> +	/* Use no-overwrite mode by default, where relay will stop accepting
> +	 * new data if there are no empty sub buffers left.
> +	 * There is no strict synchronization enforced by relay between Consumer
> +	 * and Producer. In overwrite mode, there is a possibility of getting
> +	 * inconsistent/garbled data, the producer could be writing on to the
> +	 * same sub buffer from which Consumer is reading. This can't be avoided
> +	 * unless Consumer is fast enough and can always run in tandem with
> +	 * Producer.
> +	 */
> +	if (relay_buf_full(buf))
> +		return 0;
> +
> +	return 1;
> +}
> +
> +/*
> + * file_create() callback. Creates relay file in debugfs.
> + */
> +static struct dentry *create_buf_file_callback(const char *filename,
> +					       struct dentry *parent,
> +					       umode_t mode,
> +					       struct rchan_buf *buf,
> +					       int *is_global)
> +{
> +	struct dentry *buf_file;
> +
> +	/* This to enable the use of a single buffer for the relay channel and
> +	 * correspondingly have a single file exposed to User, through which
> +	 * it can collect the logs in order without any post-processing.
> +	 */
> +	*is_global = 1;
> +
> +	if (!parent)
> +		return NULL;

Should writing to is_global be after this check?

> +
> +	/* Not using the channel filename passed as an argument, since for each
> +	 * channel relay appends the corresponding CPU number to the filename
> +	 * passed in relay_open(). This should be fine as relay just needs a
> +	 * dentry of the file associated with the channel buffer and that file's
> +	 * name need not be same as the filename passed as an argument.
> +	 */
> +	buf_file = debugfs_create_file("guc_log", mode,
> +				       parent, buf, &relay_file_operations);
> +	return buf_file;
> +}
> +
> +/*
> + * file_remove() default callback. Removes relay file in debugfs.
> + */
> +static int remove_buf_file_callback(struct dentry *dentry)
> +{
> +	debugfs_remove(dentry);
> +	return 0;
> +}
> +
> +/* relay channel callbacks */
> +static struct rchan_callbacks relay_callbacks = {
> +	.subbuf_start = subbuf_start_callback,
> +	.create_buf_file = create_buf_file_callback,
> +	.remove_buf_file = remove_buf_file_callback,
> +};
> +
> +static void guc_remove_log_relay_file(struct intel_guc *guc)
> +{
> +	relay_close(guc->log.relay_chan);
> +}
> +
> +static int guc_create_log_relay_file(struct intel_guc *guc)
> +{
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	struct rchan *guc_log_relay_chan;
> +	struct dentry *log_dir;
> +	size_t n_subbufs, subbuf_size;
> +
> +	/* For now create the log file in /sys/kernel/debug/dri/0 dir */
> +	log_dir = dev_priv->drm.primary->debugfs_root;
> +
> +	/* If /sys/kernel/debug/dri/0 location do not exist, then debugfs is
> +	 * not mounted and so can't create the relay file.
> +	 * The relay API seems to fit well with debugfs only, for availing relay
> +	 * there are 3 requirements which can be met for debugfs file only in a
> +	 * straightforward/clean manner :-
> +	 * i)   Need the associated dentry pointer of the file, while opening the
> +	 *      relay channel.
> +	 * ii)  Should be able to use 'relay_file_operations' fops for the file.
> +	 * iii) Set the 'i_private' field of file's inode to the pointer of
> +	 *	relay channel buffer.
> +	 */
> +	if (!log_dir) {
> +		DRM_ERROR("Debugfs dir not available yet for GuC log file\n");
> +		return -ENODEV;
> +	}
> +
> +	/* Keep the size of sub buffers same as shared log buffer */
> +	subbuf_size = guc->log.vma->obj->base.size;
> +
> +	/* Store up to 8 snapshots, which is large enough to buffer sufficient
> +	 * boot time logs and provides enough leeway to User, in terms of
> +	 * latency, for consuming the logs from relay. Also doesn't take
> +	 * up too much memory.
> +	 */
> +	n_subbufs = 8;
> +
> +	guc_log_relay_chan = relay_open("guc_log", log_dir, subbuf_size,
> +					n_subbufs, &relay_callbacks, dev_priv);
> +	if (!guc_log_relay_chan) {
> +		DRM_ERROR("Couldn't create relay chan for GuC logging\n");
> +		return -ENOMEM;
> +	}
> +
> +	/* FIXME: Cover the update under a lock ? */
> +	guc->log.relay_chan = guc_log_relay_chan;
> +	return 0;
> +}
> +
>   static void guc_move_to_next_buf(struct intel_guc *guc)
>   {
> +	/* Make sure the updates made in the sub buffer are visible when
> +	 * Consumer sees the following update to offset inside the sub buffer.
> +	 */
> +	smp_wmb();
> +
> +	/* All data has been written, so now move the offset of sub buffer. */
> +	relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size);
> +
> +	/* Switch to the next sub buffer */
> +	relay_flush(guc->log.relay_chan);
>   }
>
>   static void *guc_get_write_buffer(struct intel_guc *guc)
>   {
> -	return NULL;
> +	/* FIXME: Cover the check under a lock ? */
> +	if (!guc->log.relay_chan)
> +		return NULL;
> +
> +	/* Just get the base address of a new sub buffer and copy data into it
> +	 * ourselves. NULL will be returned in no-overwrite mode, if all sub
> +	 * buffers are full. Could have used the relay_write() to indirectly
> +	 * copy the data, but that would have been bit convoluted, as we need to
> +	 * write to only certain locations inside a sub buffer which cannot be
> +	 * done without using relay_reserve() along with relay_write(). So its
> +	 * better to use relay_reserve() alone.
> +	 */
> +	return relay_reserve(guc->log.relay_chan, 0);
>   }
>
>   static void guc_read_update_log_buffer(struct intel_guc *guc)
> @@ -929,6 +1077,12 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
>
>   	if (log_buffer_snapshot_state)
>   		guc_move_to_next_buf(guc);
> +	else {
> +		/* Used rate limited to avoid deluge of messages, logs might be
> +		 * getting consumed by User at a slow rate.
> +		 */
> +		DRM_ERROR_RATELIMITED("no sub-buffer to capture log buffer\n");
> +	}
>   }
>
>   static void guc_capture_logs_work(struct work_struct *work)
> @@ -964,6 +1118,11 @@ static void guc_log_cleanup(struct intel_guc *guc)
>
>   	guc->log.flush_wq = NULL;
>
> +	if (guc->log.relay_chan)
> +		guc_remove_log_relay_file(guc);
> +
> +	guc->log.relay_chan = NULL;
> +
>   	if (guc->log.buf_addr)
>   		i915_gem_object_unpin_map(guc->log.vma->obj);
>
> @@ -1054,6 +1213,35 @@ static void guc_create_log(struct intel_guc *guc)
>   	guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
>   }
>
> +static int guc_log_late_setup(struct intel_guc *guc)
> +{
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	int ret;
> +
> +	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> +
> +	if (i915.guc_log_level < 0)
> +		return -EINVAL;
> +
> +	/* If log_level was set as -1 at boot time, then vmalloc mapping would
> +	 * not have been created for the log buffer, so create one now.
> +	 */
> +	ret = guc_create_log_extras(guc);
> +	if (ret)
> +		goto err;
> +
> +	ret = guc_create_log_relay_file(guc);
> +	if (ret)
> +		goto err;
> +
> +	return 0;
> +err:
> +	guc_log_cleanup(guc);
> +	/* logging will remain off */
> +	i915.guc_log_level = -1;
> +	return ret;
> +}
> +
>   static void init_guc_policies(struct guc_policies *policies)
>   {
>   	struct guc_policy *policy;
> @@ -1223,7 +1411,6 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
>   	struct intel_guc *guc = &dev_priv->guc;
>
>   	i915_vma_unpin_and_release(&guc->ads_vma);
> -	guc_log_cleanup(guc);
>   	i915_vma_unpin_and_release(&guc->log.vma);
>
>   	if (guc->ctx_pool_vma)
> @@ -1297,3 +1484,23 @@ void i915_guc_capture_logs(struct drm_i915_private *dev_priv)
>   	host2guc_logbuffer_flush_complete(&dev_priv->guc);
>   	intel_runtime_pm_put(dev_priv);
>   }
> +
> +void i915_guc_unregister(struct drm_i915_private *dev_priv)
> +{
> +	if (!i915.enable_guc_submission)
> +		return;
> +
> +	mutex_lock(&dev_priv->drm.struct_mutex);
> +	guc_log_cleanup(&dev_priv->guc);
> +	mutex_unlock(&dev_priv->drm.struct_mutex);
> +}
> +
> +void i915_guc_register(struct drm_i915_private *dev_priv)
> +{
> +	if (!i915.enable_guc_submission)
> +		return;
> +
> +	mutex_lock(&dev_priv->drm.struct_mutex);
> +	guc_log_late_setup(&dev_priv->guc);
> +	mutex_unlock(&dev_priv->drm.struct_mutex);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index d053a18..3299cce 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -127,6 +127,7 @@ struct intel_guc_log {
>   	void *buf_addr;
>   	struct workqueue_struct *flush_wq;
>   	struct work_struct flush_work;
> +	struct rchan *relay_chan;
>   };
>
>   struct intel_guc {
> @@ -171,5 +172,7 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
>   void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
>   void i915_guc_capture_logs(struct drm_i915_private *dev_priv);
> +void i915_guc_register(struct drm_i915_private *dev_priv);
> +void i915_guc_unregister(struct drm_i915_private *dev_priv);
>
>   #endif
>

The rest looks fine to me, with a disclaimer that I don't know relayfs.

Regards,

Tvrtko


More information about the Intel-gfx mailing list