[Intel-xe] [RFC 04/25] drm/xe/eudebug: hw enablement for eudebug

Grzegorzek, Dominik dominik.grzegorzek at intel.com
Tue Nov 14 13:38:36 UTC 2023


Hi Matt,

Having read your comments, I realized some more inaccurasies in this patch.

On Mon, 2023-11-13 at 15:31 -0800, Matt Roper wrote:

> On Mon, Nov 06, 2023 at 01:18:24PM +0200, Mika Kuoppala wrote:
> > From: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> > 
> > In order to turn on debug capabilities, (i.e. breakpoints), TD_CTL
> > and some other registers needs to be programmed. Implement eudebug
> > mode enabling including eudebug related workarounds.
> > 
> > Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> > ---
> >  drivers/gpu/drm/xe/regs/xe_gt_regs.h | 14 ++++++++
> >  drivers/gpu/drm/xe/xe_eudebug.c      | 48 ++++++++++++++++++++++++++++
> >  drivers/gpu/drm/xe/xe_eudebug.h      |  2 ++
> >  drivers/gpu/drm/xe/xe_hw_engine.c    |  2 ++
> >  4 files changed, 66 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > index 902c60543de0..a1bed8b9171a 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > @@ -60,6 +60,10 @@
> >  #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
> >  #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
> >  
> > +#define CS_DEBUG_MODE2				XE_REG(0x20d8, XE_REG_OPTION_MASKED)
> > +#define   INST_STATE_CACHE_INVALIDATE		REG_BIT(6)
> > +#define   GLOBAL_DEBUG_ENABLE			REG_BIT(5)
> > +
> >  #define FF_SLICE_CS_CHICKEN1			XE_REG(0x20e0, XE_REG_OPTION_MASKED)
> >  #define   FFSC_PERCTX_PREEMPT_CTRL		REG_BIT(14)
> >  
> > @@ -334,6 +338,14 @@
> >  #define HALF_SLICE_CHICKEN7				XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED)
> >  #define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
> >  
> > +#define TD_CTL					XE_REG_MCR(0xe400)
> > +#define   TD_CTL_FEH_AND_FEE_ENABLE		REG_BIT(7) /* forced halt and exception */
> > +#define   TD_CTL_FORCE_EXTERNAL_HALT		REG_BIT(6)
> > +#define   TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE	REG_BIT(4)
> > +#define   TD_CTL_FORCE_EXCEPTION		REG_BIT(3)
> > +#define   TD_CTL_BREAKPOINT_ENABLE		REG_BIT(2)
> > +#define   TD_CTL_GLOBAL_DEBUG_ENABLE		REG_BIT(0) /* XeHP */
> > +
> >  #define CACHE_MODE_SS				XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED)
> >  #define   ENABLE_EU_COUNT_FOR_TDL_FLUSH		REG_BIT(10)
> >  #define   DISABLE_ECC				REG_BIT(5)
> > @@ -355,11 +367,13 @@
> >  #define   UGM_BACKUP_MODE			REG_BIT(13)
> >  #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
> >  #define   EARLY_EOT_DIS				REG_BIT(1)
> > +#define   STALL_DOP_GATING_DISABLE		REG_BIT(5)
> >  
> >  #define ROW_CHICKEN2				XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED)
> >  #define   DISABLE_READ_SUPPRESSION		REG_BIT(15)
> >  #define   DISABLE_EARLY_READ			REG_BIT(14)
> >  #define   ENABLE_LARGE_GRF_MODE			REG_BIT(12)
> > +#define   XEHPC_DISABLE_BTB			REG_BIT(11)
> >  #define   PUSH_CONST_DEREF_HOLD_DIS		REG_BIT(8)
> >  #define   DISABLE_DOP_GATING			REG_BIT(0)
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_eudebug.c b/drivers/gpu/drm/xe/xe_eudebug.c
> > index 3598c0f352fe..99939176bbb9 100644
> > --- a/drivers/gpu/drm/xe/xe_eudebug.c
> > +++ b/drivers/gpu/drm/xe/xe_eudebug.c
> > @@ -14,10 +14,15 @@
> >  #include <drm/drm_managed.h>
> >  #include <uapi/drm/xe_drm_tmp.h>
> >  
> > +#include "regs/xe_gt_regs.h"
> >  #include "xe_device.h"
> > +#include "xe_gt.h"
> >  #include "xe_eudebug_types.h"
> >  #include "xe_exec_queue_types.h"
> > +#include "xe_module.h"
> > +#include "xe_rtp.h"
> >  #include "xe_vm.h"
> > +#include "xe_wa.h"
> >  
> >  /*
> >   * If there is no event being read in this time (for example gdb stuck)
> > @@ -824,6 +829,49 @@ int xe_eudebug_connect_ioctl(struct drm_device *dev,
> >  	return ret;
> >  }
> >  

> > +#undef XE_REG_MCR
> > +#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
> > +
> > +void xe_eudebug_init_hw_engine(struct xe_hw_engine *hwe)
> > +{
> > +	const struct xe_rtp_entry_sr eudebug_was[] = {
> > +		{ XE_RTP_NAME("GlobalDebugEnable"),
Should be "14015447018".

> > +		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210),
> > +			       ENGINE_CLASS(RENDER)),
> > +		  XE_RTP_ACTIONS(SET(CS_DEBUG_MODE2, GLOBAL_DEBUG_ENABLE))
> > +		},
> > +		{ XE_RTP_NAME("TdCtlDebugEnable"),
I should split it into two entries. One for td clt debug enable and rest of "14015447018".
> > +		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
> > +			       FUNC(xe_rtp_match_first_render_or_compute)),
> > +		  XE_RTP_ACTIONS(SET(TD_CTL,
> > +				     TD_CTL_BREAKPOINT_ENABLE |
> > +				     TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE |
> > +				     TD_CTL_FEH_AND_FEE_ENABLE))
> > +		},
> > +		{ XE_RTP_NAME("TdCtlGlobalDebugEnable"),
XE_RTP_NAME("14015447018") and I should close the range. :)

> > +		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1250, XE_RTP_END_VERSION_UNDEFINED),
> > +			       FUNC(xe_rtp_match_first_render_or_compute)),
> > +		  XE_RTP_ACTIONS(SET(TD_CTL, TD_CTL_GLOBAL_DEBUG_ENABLE))
> > +		},
> > +		{ XE_RTP_NAME("22015693276"),
> > +		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1250, XE_RTP_END_VERSION_UNDEFINED),
> 
> The actual workaround number here is 18022722726, and the workaround
> database indicates that it is only necessary for PVC (version 12.60).
> Is there some other workaround that tells us we also need to apply this
> on DG2/ATS-M (12.55) and all later platforms as well?

Right, 18022722726. The original bug was reported on DG2 and I'm sure that we need this on 12.55.
Database indeed claims differently. I will try to have it corrected there to sort it out. Future
platrforms indeed may not be affected.

> 
> > +			       FUNC(xe_rtp_match_first_render_or_compute)),
> > +		  XE_RTP_ACTIONS(SET(ROW_CHICKEN, STALL_DOP_GATING_DISABLE))
> > +		},
> > +		{ XE_RTP_NAME("14015527279"),
> 
> The proper workaround lineage number here is 14015474168.
Yes, you are right. 
> 
> > +		  XE_RTP_RULES(PLATFORM(PVC),
> > +			       FUNC(xe_rtp_match_first_render_or_compute)),
> > +		  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, XEHPC_DISABLE_BTB))
> > +		},
> > +		{}
> > +	};
> > +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
> > +	struct xe_device *xe = gt_to_xe(hwe->gt);
> > +
> > +	if (xe->eudebug.available)
> > +		xe_rtp_process_to_sr(&ctx, eudebug_was, &hwe->reg_sr);
> 
> eudebug.available is always set if there weren't any errors (e.g.,
> memory allocation failures) during general init, right?  So even if the
> user has no plans to use EU debugging, this flag is still on, so all
> this extra programming happens anyway?  It seems like a lot of these
> things are programming that should always happen when EU debugging is
> actually being utilized, unless I'm misunderstanding.
> 
> 
> Matt
Your understanding is correct. We took simplistic approach to enable EU Debugger always. We 
were developing it on topic branch, where it could be enabled always. Our ultimate goal
and definetly sth needed before an upstream merge is an sysfs entry, which will allow to enable 
eudebug dynamically. I should have mentioned it in the commit message, sorry. 

Thanks for your comments!

~Dominik

> 
> > +}
> > +
> >  void xe_eudebug_init(struct xe_device *xe)
> >  {
> >  	int ret;
> > diff --git a/drivers/gpu/drm/xe/xe_eudebug.h b/drivers/gpu/drm/xe/xe_eudebug.h
> > index 44b20549eb6d..ac89a3d1ee1d 100644
> > --- a/drivers/gpu/drm/xe/xe_eudebug.h
> > +++ b/drivers/gpu/drm/xe/xe_eudebug.h
> > @@ -11,6 +11,7 @@ struct xe_device;
> >  struct xe_file;
> >  struct xe_vm;
> >  struct xe_exec_queue;
> > +struct xe_hw_engine;
> >  
> >  int xe_eudebug_connect_ioctl(struct drm_device *dev,
> >  			     void *data,
> > @@ -18,6 +19,7 @@ int xe_eudebug_connect_ioctl(struct drm_device *dev,
> >  
> >  void xe_eudebug_init(struct xe_device *xe);
> >  void xe_eudebug_fini(struct xe_device *xe);
> > +void xe_eudebug_init_hw_engine(struct xe_hw_engine *hwe);
> >  
> >  void xe_eudebug_file_open(struct xe_file *xef);
> >  void xe_eudebug_file_close(struct xe_file *xef);
> > diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> > index b5b084590888..59ce9574b1c5 100644
> > --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> > +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> > @@ -13,6 +13,7 @@
> >  #include "xe_assert.h"
> >  #include "xe_bo.h"
> >  #include "xe_device.h"
> > +#include "xe_eudebug.h"
> >  #include "xe_execlist.h"
> >  #include "xe_force_wake.h"
> >  #include "xe_gt.h"
> > @@ -409,6 +410,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
> >  	xe_tuning_process_engine(hwe);
> >  	xe_wa_process_engine(hwe);
> >  	hw_engine_setup_default_state(hwe);
> > +	xe_eudebug_init_hw_engine(hwe);
> >  
> >  	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
> >  	xe_reg_whitelist_process_engine(hwe);
> > -- 
> > 2.34.1
> > 
> 



More information about the Intel-xe mailing list