[PATCH 10/21] drm/xe/eudebug: Introduce per device attention scan worker

Grzegorzek, Dominik dominik.grzegorzek at intel.com
Mon Jul 29 10:10:24 UTC 2024


/On Sat, 2024-07-27 at 05:08 +0000, Matthew Brost wrote:
> On Fri, Jul 26, 2024 at 05:08:07PM +0300, Mika Kuoppala wrote:
> > From: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> > 
> > Scan for EU debugging attention bits periodically to detect if some EU
> > thread has entered the system routine (SIP) due to EU thread exception.
> > 
> > Make the scanning interval 10 times slower when there is no debugger
> > connection open. Send attention event whenever we see attention with
> > debugger presence. If there is no debugger connection active - reset.
> > 
> > Based on work by authors and other folks who were part of attentions in
> > i915.
> > 
> > - v2 Do not validate potentially active hwe against engine->hwe.
> >   Whenever the engine has width > 1, this field contains only the first
> >   hwe of the class.
> > - squash dss walking and semaphore to mutex
> > - v3 error path fix in xe_send_gt_attention (Christoph)
> > - v4 runalone active fix (Mika)
> > - v5 q->lrc changes (Mika)
> > - v6 Use C99 flexible arrays (Maciej, checkpatch)
> >      function with 'for_each' in name (Maciej, checkpatch)
> > - v7 long running active fix (Dominik)
> > - v8 resource handling errors rebase (Mika)
> > - v9 find out lrc handles first before sending event (Mika)
> > - v10 adjust runalone shift according to hw
> > 
> > Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> > Signed-off-by: Christoph Manszewski <christoph.manszewski at intel.com>
> > Signed-off-by: Maciej Patelczyk <maciej.patelczyk at intel.com>
> > Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> > ---
> >  drivers/gpu/drm/xe/Makefile              |   1 +
> >  drivers/gpu/drm/xe/regs/xe_engine_regs.h |   3 +
> >  drivers/gpu/drm/xe/regs/xe_gt_regs.h     |   7 +
> >  drivers/gpu/drm/xe/xe_device.c           |   2 +
> >  drivers/gpu/drm/xe/xe_device_types.h     |   3 +
> >  drivers/gpu/drm/xe/xe_eudebug.c          | 389 ++++++++++++++++++++++-
> >  drivers/gpu/drm/xe/xe_eudebug.h          |   1 +
> >  drivers/gpu/drm/xe/xe_eudebug_types.h    |  32 ++
> >  drivers/gpu/drm/xe/xe_gt_debug.c         | 152 +++++++++
> >  drivers/gpu/drm/xe/xe_gt_debug.h         |  21 ++
> >  include/uapi/drm/xe_drm_eudebug.h        |  15 +-
> >  11 files changed, 624 insertions(+), 2 deletions(-)
> >  create mode 100644 drivers/gpu/drm/xe/xe_gt_debug.c
> >  create mode 100644 drivers/gpu/drm/xe/xe_gt_debug.h
> > 
> > diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> > index 06badc5f99af..b7b6b047c02c 100644
> > --- a/drivers/gpu/drm/xe/Makefile
> > +++ b/drivers/gpu/drm/xe/Makefile
> > @@ -49,6 +49,7 @@ xe-y += xe_bb.o \
> >  	xe_gt_debugfs.o \
> >  	xe_gt_freq.o \
> >  	xe_gt_idle.o \
> > +	xe_gt_debug.o \
> >  	xe_gt_mcr.o \
> >  	xe_gt_pagefault.o \
> >  	xe_gt_sysfs.o \
> > diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > index 764c270599d0..b9d713a2061d 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > @@ -132,6 +132,9 @@
> >  #define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
> >  #define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
> >  
> > +#define RING_CURRENT_LRCA(base)			XE_REG((base) + 0x240)
> > +#define   CURRENT_LRCA_VALID			REG_BIT(0)
> > +
> >  #define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
> >  #define	  CTX_CTRL_OAC_CONTEXT_ENABLE		REG_BIT(8)
> >  #define	  CTX_CTRL_RUN_ALONE			REG_BIT(7)
> > diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > index 96a59a96dd4c..03e83ce3e35d 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > @@ -437,6 +437,8 @@
> >  #define   DISABLE_ECC				REG_BIT(5)
> >  #define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
> >  
> > +#define TD_ATT(x)				XE_REG_MCR(0xe470 + (x) * 4)
> > +
> >  #define ROW_CHICKEN4				XE_REG_MCR(0xe48c, XE_REG_OPTION_MASKED)
> >  #define   DISABLE_GRF_CLEAR			REG_BIT(13)
> >  #define   XEHP_DIS_BBL_SYSPIPE			REG_BIT(11)
> > @@ -516,6 +518,11 @@
> >  #define   CCS_MODE_CSLICE(cslice, ccs) \
> >  	((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))
> >  
> > +#define RCU_DEBUG_1				XE_REG(0x14a00)
> > +#define   RCU_DEBUG_1_ENGINE_STATUS		REG_GENMASK(2, 0)
> > +#define   RCU_DEBUG_1_RUNALONE_ACTIVE		REG_BIT(2)
> > +#define   RCU_DEBUG_1_CONTEXT_ACTIVE		REG_BIT(0)
> > +
> >  #define FORCEWAKE_ACK_GT			XE_REG(0x130044)
> >  
> >  /* Applicable for all FORCEWAKE_DOMAIN and FORCEWAKE_ACK_DOMAIN regs */
> > diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> > index 90bb0a8b1881..ba1c80089906 100644
> > --- a/drivers/gpu/drm/xe/xe_device.c
> > +++ b/drivers/gpu/drm/xe/xe_device.c
> > @@ -768,6 +768,8 @@ int xe_device_probe(struct xe_device *xe)
> >  
> >  	xe_debugfs_register(xe);
> >  
> > +	xe_eudebug_init_late(xe);
> > +
> >  	xe_hwmon_register(xe);
> >  
> >  	for_each_gt(gt, xe, id)
> > diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> > index 4dcfd39cb909..3b33add576be 100644
> > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > @@ -516,6 +516,9 @@ struct xe_device {
> >  
> >  		/** @ordered_wq: used to discovery */
> >  		struct workqueue_struct *ordered_wq;
> > +
> > +		/** @attention_scan: attention scan worker */
> > +		struct delayed_work attention_scan;
> >  	} eudebug;
> >  
> >  	/* private: */
> > diff --git a/drivers/gpu/drm/xe/xe_eudebug.c b/drivers/gpu/drm/xe/xe_eudebug.c
> > index 9611acedeee9..c2de001cc33a 100644
> > --- a/drivers/gpu/drm/xe/xe_eudebug.c
> > +++ b/drivers/gpu/drm/xe/xe_eudebug.c
> > @@ -11,19 +11,29 @@
> >  
> >  #include <drm/drm_managed.h>
> >  
> > -#include "regs/xe_gt_regs.h"
> >  #include "regs/xe_engine_regs.h"
> > +#include "regs/xe_gt_regs.h"
> >  #include "xe_device.h"
> >  #include "xe_assert.h"
> >  #include "xe_macros.h"
> >  #include "xe_gt.h"
> > +#include "xe_gt_debug.h"
> > +#include "xe_lrc.h"
> > +#include "xe_hw_engine.h"
> > +#include "xe_exec_queue.h"
> >  #include "xe_eudebug_types.h"
> >  #include "xe_eudebug.h"
> >  #include "xe_exec_queue_types.h"
> > +#include "xe_guc_exec_queue_types.h"
> > +#include "xe_execlist_types.h"
> > +#include "xe_mmio.h"
> >  #include "xe_module.h"
> > +#include "xe_pm.h"
> >  #include "xe_rtp.h"
> > +#include "xe_sched_job.h"
> >  #include "xe_vm.h"
> >  #include "xe_wa.h"
> > +#include "xe_force_wake.h"
> >  
> >  /*
> >   * If there is no detected event read by userspace, during this period, assume
> > @@ -843,6 +853,371 @@ static const struct file_operations fops = {
> >  	.unlocked_ioctl	= xe_eudebug_ioctl,
> >  };
> >  
> > +static bool queue_has_active_job(struct xe_exec_queue *q)
> 
> This should probably be a generic function in xe_exec_queue.c. We seemly
> already have one - xe_exec_queue_is_idle.
> 
> > +{
> > +
> > +	struct drm_gpu_scheduler *sched;
> > +	struct drm_sched_job *drm_job;
> > +
> > +	if (xe_device_uc_enabled(gt_to_xe(q->gt)))
> 
> General comment, we likely don't want to the EU debugger enabled unless
> GuC submission is enabled as the execlist backend really doesn't work. 
> 
> > +		sched = &q->guc->sched.base;
> > +	else
> > +		sched = &q->execlist->sched;
> > +
> > +	drm_job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list);
> 
> Random musing that don't apply as we have xe_exec_queue_is_idle but...
> 
> You need a scheduler lock here which is missing. If you wanted to see
> the scheduler pending list was not empty, we'd call into the drm
> scheduler layer.
> 
> That being said, I think the EU debugger only support LR VMs? If so,
> then this always going to be empty.
Up until now, we were not requiring a VM to be in lr mode, thus I wanted to support both paths.
However, we do depend on LR mode, as job timeouts would make debugging effectively ineffective.
Therefore, we should add this constraint.

Thanks for your comments! I will reuse xe_exec_queue_is_idle here as proposed, and I will try to
follow your hints with respect to locking and forcewake.

Regards,
Dominik
> 
> > +
> > +	if (drm_job) {
> > +		struct xe_sched_job *job = to_xe_sched_job(drm_job);
> > +
> > +		return xe_sched_job_started(job) && !xe_sched_job_completed(job);
> > +	} else if (xe_exec_queue_is_lr(q) &&
> > +		   (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) {
> > +		return true;
> > +	}
> > +
> > +	return false;
> > +}
> > +
> > +static int current_lrc(struct xe_hw_engine *hwe, u32 *lrc_hw)
> > +{
> > +	u32 lrc_reg;
> > +	int err;
> > +
> > +	err = xe_force_wake_get(gt_to_fw(hwe->gt), hwe->domain);
> > +	if (err)
> > +		return err;
> > +
> > +	lrc_reg = hw_engine_mmio_read32(hwe, RING_CURRENT_LRCA(0));
> > +
> > +	xe_force_wake_put(gt_to_fw(hwe->gt), hwe->domain);
> > +
> > +	if (!(lrc_reg & CURRENT_LRCA_VALID))
> > +		return -ENOENT;
> > +
> > +	*lrc_hw = lrc_reg & GENMASK(31, 12);
> > +
> > +	return 0;
> > +}
> > +
> > +static int match_engine_lrc(struct xe_exec_queue *q, u32 lrc_hw)
> > +{
> > +	int i;
> > +	u32 lrc_ggtt;
> > +
> > +	for (i = 0; i < q->width; i++) {
> > +		lrc_ggtt = lower_32_bits(xe_lrc_descriptor(q->lrc[i]));
> > +		lrc_ggtt &= GENMASK(31, 12);
> > +		if (lrc_ggtt == lrc_hw)
> > +			return i;
> > +	}
> > +
> > +	return -1;
> > +}
> > +
> > +static u32 engine_status(const struct xe_hw_engine * const hwe,
> > +			 u32 rcu_debug1)
> > +{
> > +	const bool xe1 = GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20;
> > +	unsigned int shift;
> > +
> > +	if (hwe->class == XE_ENGINE_CLASS_RENDER) {
> > +		shift = 7;
> > +		XE_WARN_ON(hwe->instance != 0);
> > +	} else if (hwe->class == XE_ENGINE_CLASS_COMPUTE) {
> > +		XE_WARN_ON(hwe->instance > 3);
> > +
> > +		if (xe1)
> > +			shift = 10 + (hwe->instance * 3);
> > +		else
> > +			shift = 11 + (hwe->instance * 4);
> > +	} else {
> > +		XE_WARN_ON(hwe->class);
> > +		return 0;
> > +	}
> > +
> > +	return (rcu_debug1 >> shift) & RCU_DEBUG_1_ENGINE_STATUS;
> > +}
> > +
> > +static bool engine_runalone_set(const struct xe_hw_engine * const hwe,
> > +				   u32 rcu_debug1)
> > +{
> > +	return engine_status(hwe, rcu_debug1) & RCU_DEBUG_1_RUNALONE_ACTIVE;
> > +}
> > +
> > +static bool engine_context_set(const struct xe_hw_engine * const hwe,
> > +			       u32 rcu_debug1)
> > +{
> > +	return engine_status(hwe, rcu_debug1) & RCU_DEBUG_1_CONTEXT_ACTIVE;
> > +}
> > +
> > +static bool engine_has_runalone(const struct xe_hw_engine * const hwe)
> > +{
> > +	return hwe->class == XE_ENGINE_CLASS_RENDER ||
> > +		hwe->class == XE_ENGINE_CLASS_COMPUTE;
> > +}
> > +
> > +static struct xe_hw_engine *get_runalone_active_hw_engine(struct xe_gt *gt)
> > +{
> > +	struct xe_hw_engine *hwe, *first = NULL;
> > +	unsigned int num_active, id;
> > +	u32 val;
> > +
> > +	if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)) {
> > +		drm_dbg(&gt_to_xe(gt)->drm, "eudbg: runalone failed to get force wake\n");
> > +		return NULL;
> > +	}
> > +
> > +	val = xe_mmio_read32(gt, RCU_DEBUG_1);
> > +	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> > +
> > +	drm_dbg(&gt_to_xe(gt)->drm, "eudbg: runalone RCU_DEBUG_1 = 0x%08x\n", val);
> > +
> > +	num_active = 0;
> > +	for_each_hw_engine(hwe, gt, id) {
> > +		bool runalone, ctx;
> > +
> > +		if (!engine_has_runalone(hwe))
> > +			continue;
> > +
> > +		runalone = engine_runalone_set(hwe, val);
> > +		ctx = engine_context_set(hwe, val);
> > +
> > +		drm_dbg(&gt_to_xe(gt)->drm, "eudbg: engine %s: runalone=%s, context=%s",
> > +			hwe->name, runalone ? "active" : "inactive",
> > +			ctx ? "active" : "inactive");
> > +
> > +		/*
> > +		 * On earlier gen12 the context status seems to be idle when
> > +		 * it has raised attention. We have to omit the active bit.
> > +		 */
> > +		if (IS_DGFX(gt_to_xe(gt)))
> > +			ctx = true;
> > +
> > +		if (runalone && ctx) {
> > +			num_active++;
> > +
> > +			drm_dbg(&gt_to_xe(gt)->drm, "eudbg: runalone engine %s %s",
> > +				hwe->name, first ? "selected" : "found");
> > +			if (!first)
> > +				first = hwe;
> > +		}
> > +	}
> > +
> > +	if (num_active > 1)
> > +		drm_err(&gt_to_xe(gt)->drm, "eudbg: %d runalone engines active!",
> > +			num_active);
> > +
> > +	return first;
> > +}
> > +
> > +static struct xe_exec_queue *runalone_active_queue_get(struct xe_gt *gt, int *lrc_idx)
> > +{
> > +	struct xe_device *xe = gt_to_xe(gt);
> > +	struct xe_exec_queue *q, *found = NULL;
> > +	struct xe_hw_engine *active;
> > +	struct xe_file *xef, *tmp;
> > +	unsigned long i;
> > +	int idx, err;
> > +	u32 lrc_hw;
> > +
> > +	active = get_runalone_active_hw_engine(gt);
> > +	if (!active) {
> > +		drm_dbg(&gt_to_xe(gt)->drm, "Runalone engine not found!");
> > +		return ERR_PTR(-ENOENT);
> > +	}
> > +
> > +	err = current_lrc(active, &lrc_hw);
> > +	if (err)
> > +		return ERR_PTR(err);
> > +
> > +	mutex_lock(&xe->files.lock);
> > +	list_for_each_entry_safe(xef, tmp, &xe->files.list, link) {
> > +		mutex_lock(&xef->exec_queue.lock);
> > +		xa_for_each(&xef->exec_queue.xa, i, q) {
> 
> Same comment here as [1]. Don't hold xe->files.lock or
> xef->exec_queue.lock for anything but the lookup. Holding locks longer
> than needed creates unwanted lock dep chains which creates all sorts of
> problems. The only time chains should exist is when we have well defined
> locking chain like we have for a VM (vm->lock -> dma-resv -> notifier
> lock).
> 
> So this should look something like this:
> 
> mutex_lock(&xe->files.lock);
> xa_for_each(.., xef) {
> 	xe_file_get(xef);
> 	mutex_unlock(&xe->files.lock);
> 
> 	mutex_lock(&xef->exec_queue.lock);
> 	xa_for_each(&xef->exec_queue.xa, i, q) {
> 		xe_exec_queue_get(q);
> 		mutex_unlock(&xef->exec_queue.lock);
> 
> 		/* Do something */
> 	
> 		xe_exec_queue_put(q);
> 		mutex_lock(&xef->exec_queue.lock);
> 	}
> 	mutex_unlock(&xef->exec_queue.lock);
> 	
> 	xe_file_put(xef);
> 	mutex_lock(&xe->files.lock);
> }
> mutex_unlock(&xe->files.lock);
> 
> Note you will need to change xe->files.list to an xarray to make this
> safe too.
> 
> Matt
> 
> [1] https://patchwork.freedesktop.org/patch/606052/?series=136572&rev=1
> 
> > +			if (q->gt != gt)
> > +				continue;
> > +
> > +			if (q->class != active->class)
> > +				continue;
> > +
> > +			if (!queue_has_active_job(q))
> > +				continue;
> > +
> > +			idx = match_engine_lrc(q, lrc_hw);
> > +			if (idx < 0)
> > +				continue;
> > +
> > +			xe_exec_queue_get(q);
> > +			found = q;
> > +
> > +			if (lrc_idx)
> > +				*lrc_idx = idx;
> > +
> > +			break;
> > +		}
> > +		mutex_unlock(&xef->exec_queue.lock);
> > +
> > +		if (found)
> > +			break;
> > +	}
> > +	mutex_unlock(&xe->files.lock);
> > +
> > +	if (!found)
> > +		return ERR_PTR(-ENOENT);
> > +
> > +	if (XE_WARN_ON(current_lrc(active, &lrc_hw)) &&
> > +	    XE_WARN_ON(match_engine_lrc(found, lrc_hw) < 0)) {
> > +		xe_exec_queue_put(found);
> > +		return ERR_PTR(-ENOENT);
> > +	}
> > +
> > +	return found;
> > +}
> > +
> > +static int send_attention_event(struct xe_eudebug *d, struct xe_exec_queue *q, int lrc_idx)
> > +{
> > +	struct xe_eudebug_event_eu_attention *ea;
> > +	struct xe_eudebug_event *event;
> > +	int h_c, h_queue, h_lrc;
> > +	u32 size = xe_gt_eu_attention_bitmap_size(q->gt);
> > +	u32 sz = struct_size(ea, bitmask, size);
> > +	int ret;
> > +
> > +	XE_WARN_ON(lrc_idx < 0 || lrc_idx >= q->width);
> > +
> > +	h_c = find_handle(d->res, XE_EUDEBUG_RES_TYPE_CLIENT, q->vm->xef);
> > +	if (h_c < 0)
> > +		return h_c;
> > +
> > +	h_queue = find_handle(d->res, XE_EUDEBUG_RES_TYPE_EXEC_QUEUE, q);
> > +	if (h_queue < 0)
> > +		return h_queue;
> > +
> > +	h_lrc = find_handle(d->res, XE_EUDEBUG_RES_TYPE_LRC, q->lrc[lrc_idx]);
> > +	if (h_lrc < 0)
> > +		return h_lrc;
> > +
> > +	event = __xe_eudebug_create_event(d, 0, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> > +					  DRM_XE_EUDEBUG_EVENT_STATE_CHANGE, sz, GFP_KERNEL);
> > +
> > +	if (!event)
> > +		return -ENOSPC;
> > +
> > +	ea = cast_event(ea, event);
> > +	write_member(struct drm_xe_eudebug_event_eu_attention, ea, client_handle, (u64)h_c);
> > +	write_member(struct drm_xe_eudebug_event_eu_attention, ea, exec_queue_handle, (u64)h_queue);
> > +	write_member(struct drm_xe_eudebug_event_eu_attention, ea, lrc_handle, (u64)h_lrc);
> > +	write_member(struct drm_xe_eudebug_event_eu_attention, ea, bitmask_size, size);
> > +
> > +	mutex_lock(&d->eu_lock);
> > +	event->seqno = atomic_long_inc_return(&d->events.seqno);
> > +	ret = xe_gt_eu_attention_bitmap(q->gt, &ea->bitmask[0], ea->bitmask_size);
> > +	mutex_unlock(&d->eu_lock);
> > +
> > +	if (ret)
> > +		return ret;
> > +
> > +	return xe_eudebug_queue_event(d, event);
> > +}
> > +
> > +
> > +static int xe_send_gt_attention(struct xe_gt *gt)
> > +{
> > +	struct xe_eudebug *d;
> > +	struct xe_exec_queue *q;
> > +	int ret, lrc_idx;
> > +
> > +	if (list_empty_careful(&gt_to_xe(gt)->eudebug.list))
> > +		return -ENOTCONN;
> > +
> > +	q = runalone_active_queue_get(gt, &lrc_idx);
> > +	if (IS_ERR(q))
> > +		return PTR_ERR(q);
> > +
> > +	d = xe_eudebug_get(q->vm->xef);
> > +	if (!d) {
> > +		ret = -ENOTCONN;
> > +		goto err_exec_queue_put;
> > +	}
> > +
> > +	if (!completion_done(&d->discovery)) {
> > +		eu_dbg(d, "discovery not yet done\n");
> > +		ret = -EBUSY;
> > +		goto err_eudebug_put;
> > +	}
> > +
> > +	ret = send_attention_event(d, q, lrc_idx);
> > +	if (ret)
> > +		xe_eudebug_disconnect(d, ret);
> > +
> > +err_eudebug_put:
> > +	xe_eudebug_put(d);
> > +err_exec_queue_put:
> > +	xe_exec_queue_put(q);
> > +
> > +	return ret;
> > +}
> > +
> > +static int xe_eudebug_handle_gt_attention(struct xe_gt *gt)
> > +{
> > +	int ret;
> > +
> > +	ret = xe_gt_eu_threads_needing_attention(gt);
> > +	if (ret <= 0)
> > +		return ret;
> > +
> > +	ret = xe_send_gt_attention(gt);
> > +
> > +	/* Discovery in progress, fake it */
> > +	if (ret == -EBUSY)
> > +		return 0;
> > +
> > +	return ret;
> > +}
> > +
> > +#define XE_EUDEBUG_ATTENTION_INTERVAL 100
> > +static void attention_scan_fn(struct work_struct *work)
> > +{
> > +	struct xe_device *xe = container_of(work, typeof(*xe), eudebug.attention_scan.work);
> > +	long delay = msecs_to_jiffies(XE_EUDEBUG_ATTENTION_INTERVAL);
> > +	struct xe_gt *gt;
> > +	u8 gt_id;
> > +
> > +	if (list_empty_careful(&xe->eudebug.list))
> > +		delay *= 10;
> > +
> > +	if (delay >= HZ)
> > +		delay = round_jiffies_up_relative(delay);
> > +
> > +	if (pm_runtime_active(xe->drm.dev)) {
> > +		for_each_gt(gt, xe, gt_id) {
> > +			int ret;
> > +
> > +			ret = xe_eudebug_handle_gt_attention(gt);
> > +			if (ret) {
> > +				// TODO: error capture
> > +				drm_info(&gt_to_xe(gt)->drm,
> > +					 "gt:%d unable to handle eu attention ret=%d\n",
> > +					 gt_id, ret);
> > +
> > +				xe_gt_reset_async(gt);
> > +			}
> > +		}
> > +	}
> > +
> > +	schedule_delayed_work(&xe->eudebug.attention_scan, delay);
> > +}
> > +
> > +static void attention_scan_cancel(struct xe_device *xe)
> > +{
> > +	cancel_delayed_work_sync(&xe->eudebug.attention_scan);
> > +}
> > +
> > +static void attention_scan_flush(struct xe_device *xe)
> > +{
> > +	mod_delayed_work(system_wq, &xe->eudebug.attention_scan, 0);
> > +}
> > +
> >  static void discovery_work_fn(struct work_struct *work);
> >  
> >  static int
> > @@ -877,6 +1252,7 @@ xe_eudebug_connect(struct xe_device *xe,
> >  
> >  	kref_init(&d->ref);
> >  	spin_lock_init(&d->connection.lock);
> > +	mutex_init(&d->eu_lock);
> >  	init_waitqueue_head(&d->events.write_done);
> >  	init_waitqueue_head(&d->events.read_done);
> >  	init_completion(&d->discovery);
> > @@ -903,6 +1279,7 @@ xe_eudebug_connect(struct xe_device *xe,
> >  
> >  	kref_get(&d->ref);
> >  	queue_work(xe->eudebug.ordered_wq, &d->discovery_work);
> > +	attention_scan_flush(xe);
> >  
> >  	eu_dbg(d, "connected session %lld", d->session);
> >  
> > @@ -979,12 +1356,22 @@ void xe_eudebug_init(struct xe_device *xe)
> >  {
> >  	spin_lock_init(&xe->eudebug.lock);
> >  	INIT_LIST_HEAD(&xe->eudebug.list);
> > +	INIT_DELAYED_WORK(&xe->eudebug.attention_scan, attention_scan_fn);
> >  
> >  	xe->eudebug.available = true;
> >  }
> >  
> > +void xe_eudebug_init_late(struct xe_device *xe)
> > +{
> > +	if (!xe->eudebug.available)
> > +		return;
> > +
> > +	attention_scan_flush(xe);
> > +}
> > +
> >  void xe_eudebug_fini(struct xe_device *xe)
> >  {
> > +	attention_scan_cancel(xe);
> >  	xe_assert(xe, list_empty_careful(&xe->eudebug.list));
> >  }
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_eudebug.h b/drivers/gpu/drm/xe/xe_eudebug.h
> > index ac89a3d1ee1d..1e233c4683d6 100644
> > --- a/drivers/gpu/drm/xe/xe_eudebug.h
> > +++ b/drivers/gpu/drm/xe/xe_eudebug.h
> > @@ -18,6 +18,7 @@ int xe_eudebug_connect_ioctl(struct drm_device *dev,
> >  			     struct drm_file *file);
> >  
> >  void xe_eudebug_init(struct xe_device *xe);
> > +void xe_eudebug_init_late(struct xe_device *xe);
> >  void xe_eudebug_fini(struct xe_device *xe);
> >  void xe_eudebug_init_hw_engine(struct xe_hw_engine *hwe);
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_eudebug_types.h b/drivers/gpu/drm/xe/xe_eudebug_types.h
> > index 6e3c23023933..16667b4dfe45 100644
> > --- a/drivers/gpu/drm/xe/xe_eudebug_types.h
> > +++ b/drivers/gpu/drm/xe/xe_eudebug_types.h
> > @@ -105,6 +105,9 @@ struct xe_eudebug {
> >  	/** @discovery_work: worker to discover resources for target_task */
> >  	struct work_struct discovery_work;
> >  
> > +	/** eu_lock: guards operations on eus (eu thread control and attention) */
> > +	struct mutex eu_lock;
> > +
> >  	/** @events: kfifo queue of to-be-delivered events */
> >  	struct {
> >  		/** @lock: guards access to fifo */
> > @@ -202,4 +205,33 @@ struct xe_eudebug_event_exec_queue {
> >  	u64 lrc_handle[];
> >  };
> >  
> > +/**
> > + * struct xe_eudebug_event_eu_attention - Internal event for EU attention
> > + */
> > +struct xe_eudebug_event_eu_attention {
> > +	/** @base: base event */
> > +	struct xe_eudebug_event base;
> > +
> > +	/** @client_handle: client for the attention */
> > +	u64 client_handle;
> > +
> > +	/** @exec_queue_handle: handle of exec_queue which raised attention */
> > +	u64 exec_queue_handle;
> > +
> > +	/** @lrc_handle: lrc handle of the workload which raised attention */
> > +	u64 lrc_handle;
> > +
> > +	/** @flags: eu attention event flags, currently MBZ */
> > +	u32 flags;
> > +
> > +	/** @bitmask_size: size of the bitmask, specific to device */
> > +	u32 bitmask_size;
> > +
> > +	/**
> > +	 * @bitmask: reflects threads currently signalling attention,
> > +	 * starting from natural hardware order of DSS=0, eu=0
> > +	 */
> > +	u8 bitmask[];
> > +};
> > +
> >  #endif
> > diff --git a/drivers/gpu/drm/xe/xe_gt_debug.c b/drivers/gpu/drm/xe/xe_gt_debug.c
> > new file mode 100644
> > index 000000000000..04d2d43ce249
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/xe_gt_debug.c
> > @@ -0,0 +1,152 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2023 Intel Corporation
> > + */
> > +
> > +#include "regs/xe_gt_regs.h"
> > +#include "xe_device.h"
> > +#include "xe_force_wake.h"
> > +#include "xe_gt.h"
> > +#include "xe_gt_topology.h"
> > +#include "xe_gt_debug.h"
> > +#include "xe_gt_mcr.h"
> > +#include "xe_pm.h"
> > +#include "xe_macros.h"
> > +
> > +static int xe_gt_foreach_dss_group_instance(struct xe_gt *gt,
> > +					    int (*fn)(struct xe_gt *gt,
> > +						      void *data,
> > +						      u16 group,
> > +						      u16 instance),
> > +					    void *data)
> > +{
> > +	const enum xe_force_wake_domains fw_domains = XE_FW_GT | XE_FW_RENDER;
> > +	unsigned int dss;
> > +	u16 group, instance;
> > +	int ret;
> > +
> > +	xe_pm_runtime_get(gt_to_xe(gt));
> > +	ret = xe_force_wake_get(gt_to_fw(gt), fw_domains);
> > +	if (ret)
> > +		goto pm_runtime_put;
> > +
> > +	for_each_dss_steering(dss, gt, group, instance) {
> > +		ret = fn(gt, data, group, instance);
> > +		if (ret)
> > +			break;
> > +	}
> > +
> > +	xe_force_wake_put(gt_to_fw(gt), fw_domains);
> > +pm_runtime_put:
> > +	xe_pm_runtime_put(gt_to_xe(gt));
> > +
> > +	return ret;
> > +}
> > +
> > +static int read_first_attention_mcr(struct xe_gt *gt, void *data,
> > +				    u16 group, u16 instance)
> > +{
> > +	unsigned int row;
> > +
> > +	for (row = 0; row < 2; row++) {
> > +		u32 val;
> > +
> > +		val = xe_gt_mcr_unicast_read(gt, TD_ATT(row), group, instance);
> > +
> > +		if (val)
> > +			return 1;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +#define MAX_EUS_PER_ROW 4u
> > +#define MAX_THREADS 8u
> > +
> > +/**
> > + * xe_gt_eu_attention_bitmap_size - query size of the attention bitmask
> > + *
> > + * @gt: pointer to struct xe_gt
> > + *
> > + * Return: size in bytes.
> > + */
> > +int xe_gt_eu_attention_bitmap_size(struct xe_gt *gt)
> > +{
> > +	xe_dss_mask_t dss_mask;
> > +
> > +	bitmap_or(dss_mask, gt->fuse_topo.c_dss_mask,
> > +		  gt->fuse_topo.g_dss_mask, XE_MAX_DSS_FUSE_BITS);
> > +
> > +	return  bitmap_weight(dss_mask, XE_MAX_DSS_FUSE_BITS) *
> > +		TD_EU_ATTENTION_MAX_ROWS * MAX_THREADS *
> > +		MAX_EUS_PER_ROW / 8;
> > +}
> > +
> > +struct attn_read_iter {
> > +	struct xe_gt *gt;
> > +	unsigned int i;
> > +	unsigned int size;
> > +	u8 *bits;
> > +};
> > +
> > +static int read_eu_attentions_mcr(struct xe_gt *gt, void *data,
> > +				  u16 group, u16 instance)
> > +{
> > +	struct attn_read_iter * const iter = data;
> > +	unsigned int row;
> > +
> > +	for (row = 0; row < TD_EU_ATTENTION_MAX_ROWS; row++) {
> > +		u32 val;
> > +
> > +		if (iter->i >= iter->size)
> > +			return 0;
> > +
> > +		XE_WARN_ON(iter->i + sizeof(val) > xe_gt_eu_attention_bitmap_size(gt));
> > +
> > +		val = xe_gt_mcr_unicast_read(gt, TD_ATT(row), group, instance);
> > +
> > +
> > +		memcpy(&iter->bits[iter->i], &val, sizeof(val));
> > +		iter->i += sizeof(val);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/**
> > + * xe_gt_eu_attention_bitmap - query host attention
> > + *
> > + * @gt: pointer to struct xe_gt
> > + *
> > + * Return: 0 on success, negative otherwise.
> > + */
> > +int xe_gt_eu_attention_bitmap(struct xe_gt *gt, u8 *bits,
> > +			      unsigned int bitmap_size)
> > +{
> > +	struct attn_read_iter iter = {
> > +		.gt = gt,
> > +		.i = 0,
> > +		.size = bitmap_size,
> > +		.bits = bits
> > +	};
> > +
> > +	return xe_gt_foreach_dss_group_instance(gt, read_eu_attentions_mcr, &iter);
> > +}
> > +
> > +/**
> > + * xe_gt_eu_threads_needing_attention - Query host attention
> > + *
> > + * @gt: pointer to struct xe_gt
> > + *
> > + * Return: 1 if threads waiting host attention, 0 otherwise.
> > + */
> > +int xe_gt_eu_threads_needing_attention(struct xe_gt *gt)
> > +{
> > +	int err;
> > +
> > +	err = xe_gt_foreach_dss_group_instance(gt, read_first_attention_mcr, NULL);
> > +
> > +	XE_WARN_ON(err < 0);
> > +
> > +	return err < 0 ? 0 : err;
> > +}
> > diff --git a/drivers/gpu/drm/xe/xe_gt_debug.h b/drivers/gpu/drm/xe/xe_gt_debug.h
> > new file mode 100644
> > index 000000000000..3f13dbb17a5f
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/xe_gt_debug.h
> > @@ -0,0 +1,21 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2023 Intel Corporation
> > + */
> > +
> > +#ifndef __XE_GT_DEBUG_
> > +#define __XE_GT_DEBUG_
> > +
> > +#define TD_EU_ATTENTION_MAX_ROWS 2u
> > +
> > +#include "xe_gt_types.h"
> > +
> > +#define XE_GT_ATTENTION_TIMEOUT_MS 100
> > +
> > +int xe_gt_eu_threads_needing_attention(struct xe_gt *gt);
> > +
> > +int xe_gt_eu_attention_bitmap_size(struct xe_gt *gt);
> > +int xe_gt_eu_attention_bitmap(struct xe_gt *gt, u8 *bits,
> > +			      unsigned int bitmap_size);
> > +
> > +#endif
> > diff --git a/include/uapi/drm/xe_drm_eudebug.h b/include/uapi/drm/xe_drm_eudebug.h
> > index 25dddb8b22f4..453269ac8307 100644
> > --- a/include/uapi/drm/xe_drm_eudebug.h
> > +++ b/include/uapi/drm/xe_drm_eudebug.h
> > @@ -27,13 +27,15 @@ struct drm_xe_eudebug_event {
> >  #define DRM_XE_EUDEBUG_EVENT_OPEN		2
> >  #define DRM_XE_EUDEBUG_EVENT_VM			3
> >  #define DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE		4
> > -#define DRM_XE_EUDEBUG_EVENT_MAX_EVENT		DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE
> > +#define DRM_XE_EUDEBUG_EVENT_EU_ATTENTION	5
> > +#define DRM_XE_EUDEBUG_EVENT_MAX_EVENT		DRM_XE_EUDEBUG_EVENT_EU_ATTENTION
> >  
> >  	__u16 flags;
> >  #define DRM_XE_EUDEBUG_EVENT_CREATE		(1 << 0)
> >  #define DRM_XE_EUDEBUG_EVENT_DESTROY		(1 << 1)
> >  #define DRM_XE_EUDEBUG_EVENT_STATE_CHANGE	(1 << 2)
> >  #define DRM_XE_EUDEBUG_EVENT_NEED_ACK		(1 << 3)
> > +
> >  	__u64 seqno;
> >  	__u64 reserved;
> >  };
> > @@ -62,6 +64,17 @@ struct drm_xe_eudebug_event_exec_queue {
> >  	__u64 lrc_handle[];
> >  };
> >  
> > +struct drm_xe_eudebug_event_eu_attention {
> > +	struct drm_xe_eudebug_event base;
> > +
> > +	__u64 client_handle;
> > +	__u64 exec_queue_handle;
> > +	__u64 lrc_handle;
> > +	__u32 flags;
> > +	__u32 bitmask_size;
> > +	__u8 bitmask[];
> > +};
> > +
> >  #if defined(__cplusplus)
> >  }
> >  #endif
> > -- 
> > 2.34.1
> > 



More information about the Intel-xe mailing list