[Intel-xe] [PATCH 1/2] drm/xe: Set default MOCS value for cs instructions

Matt Roper matthew.d.roper at intel.com
Mon Apr 24 22:46:20 UTC 2023


On Fri, Apr 14, 2023 at 09:09:05PM -0700, José Roberto de Souza wrote:
> CS instructions that dont have a explicit MOCS field will use this
> default MOCS value.
> 
> To do this, it was necessary to initialize part of the mocs earlier
> and add new function that loads another array of rtp entries set
> during run-time.
> 
> This is still missing to handle of mocs read for platforms with
> HAS_L3_CCS_READ(aka PVC).
> 
> This was mainly copied from i915 source code.
> 
> Bspec: 45826
> Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
> ---
>  drivers/gpu/drm/xe/regs/xe_engine_regs.h | 16 +++++++++++
>  drivers/gpu/drm/xe/xe_gt.c               |  2 ++
>  drivers/gpu/drm/xe/xe_mocs.c             | 11 ++++++--
>  drivers/gpu/drm/xe/xe_mocs.h             |  1 +
>  drivers/gpu/drm/xe/xe_wa.c               | 36 ++++++++++++++++++++++++
>  5 files changed, 64 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> index 2aa67d001c34b..cc874348f76bc 100644
> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> @@ -89,6 +89,22 @@
>  #define RING_EXECLIST_CONTROL(base)		_MMIO((base) + 0x550)
>  #define	  EL_CTRL_LOAD				REG_BIT(0)
>  
> +#define RING_CMD_CCTL(base)			_MMIO((base) + 0xc4)

It looks like this is at the wrong spot in the header.  An offset of
0xc4 should be much higher up.

> +/*
> + * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
> + * The lsb of each can be considered a separate enabling bit for encryption.
> + * 6:0 == default MOCS value for reads  =>  6:1 == table index for reads.
> + * 13:7 == default MOCS value for writes => 13:8 == table index for writes.
> + * 15:14 == Reserved => 31:30 are set to 0.
> + */
> +#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7)
> +#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0)

I'd be inclined to just define the field as the specific bits we
actually need to set in the driver, even if the way it's represented in
the bspec is a bit confusing on current platforms (on future platforms
they've cleaned this up doc-wise).  I.e.,

  #define CMD_CCTL_WRITE_MOCS_IDX_MASK          REG_GENMASK(13, 8)
  #define CMD_CCTL_READ_MOCS_IDX_MASK           REG_GENMASK(6, 1)

Then people don't get confused about when they do/don't need to left
shift the MOCS index; everything in the kernel driver can just use the
index.

> +#define CMD_CCTL_MOCS_MASK (CMD_CCTL_WRITE_OVERRIDE_MASK | \
> +			    CMD_CCTL_READ_OVERRIDE_MASK)
> +#define CMD_CCTL_MOCS_OVERRIDE(write, read)				      \
> +		(REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \
> +		 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))

I know this comes from i915, but personally I don't see much value in
these two macros; it's easy enough (and more transparent) to just OR the
read and write mask/values together at the point they're used.

> +
>  #define VDBOX_CGCTL3F10(base)			_MMIO((base) + 0x3f10)
>  #define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
>  
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 4186f7f0d42f5..126434b4c3d7b 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -390,6 +390,8 @@ static int gt_fw_domain_init(struct xe_gt *gt)
>  	/* Rerun MCR init as we now have hw engine list */
>  	xe_gt_mcr_init(gt);
>  
> +	xe_mocs_init_early(gt);
> +
>  	err = xe_hw_engines_init_early(gt);
>  	if (err)
>  		goto err_force_wake;
> diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
> index e09c6242aafc0..b1a522bd1b418 100644
> --- a/drivers/gpu/drm/xe/xe_mocs.c
> +++ b/drivers/gpu/drm/xe/xe_mocs.c
> @@ -517,6 +517,15 @@ static void init_l3cc_table(struct xe_gt *gt,
>  	}
>  }
>  
> +void xe_mocs_init_early(struct xe_gt *gt)
> +{
> +	struct xe_mocs_info table;
> +
> +	get_mocs_settings(gt->xe, &table);
> +	gt->mocs.uc_index = table.uc_index;
> +	gt->mocs.wb_index = table.wb_index;
> +}
> +
>  void xe_mocs_init(struct xe_gt *gt)
>  {
>  	struct xe_mocs_info table;
> @@ -527,8 +536,6 @@ void xe_mocs_init(struct xe_gt *gt)
>  	 */
>  	flags = get_mocs_settings(gt->xe, &table);
>  	mocs_dbg(&gt->xe->drm, "flag:0x%x\n", flags);
> -	gt->mocs.uc_index = table.uc_index;
> -	gt->mocs.wb_index = table.wb_index;
>  
>  	if (flags & HAS_GLOBAL_MOCS)
>  		__init_mocs_table(gt, &table, GEN12_GLOBAL_MOCS(0).reg);
> diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
> index 63500a1d6660a..25f7b35a76daf 100644
> --- a/drivers/gpu/drm/xe/xe_mocs.h
> +++ b/drivers/gpu/drm/xe/xe_mocs.h
> @@ -11,6 +11,7 @@
>  struct xe_engine;
>  struct xe_gt;
>  
> +void xe_mocs_init_early(struct xe_gt *gt);
>  void xe_mocs_init(struct xe_gt *gt);
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
> index a7d681b7538d4..5ef3561101466 100644
> --- a/drivers/gpu/drm/xe/xe_wa.c
> +++ b/drivers/gpu/drm/xe/xe_wa.c

I don't think this is the right file (or function name) for this.
i915's register programming infrastructure referred to everything as a
"workaround" even though we wound up using it for a bunch of other
non-WA things in the end.  In Xe the RTP/SR infrastructure can be used
from multiple places, so we're keeping xe_wa.c focused on real, official
workarounds and putting things that are more tuning/policy focused in
xe_tuning.c


Matt

> @@ -631,6 +631,41 @@ void xe_wa_process_gt(struct xe_gt *gt)
>  }
>  EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
>  
> +/**
> + * xe_wa_process_runtime_was_engine - process engine runtime workarounds
> + *
> + * For workarounds that program values that are only available in run-time.
> + */
> +static void
> +xe_wa_process_runtime_was_engine(struct xe_reg_sr *sr,
> +				 struct xe_gt *gt, struct xe_hw_engine *hwe)
> +{
> +	u8 mocs_w = gt->mocs.uc_index;
> +	/* TODO: missing handling of HAS_L3_CCS_READ platforms */
> +	u8 mocs_r = gt->mocs.uc_index;
> +	struct xe_rtp_entry engine_was[] = {
> +		/*
> +		 * RING_CMD_CCTL specifies the default MOCS entry that will be
> +		 * used by the command streamer when executing commands that
> +		 * don't have a way to explicitly specify a MOCS setting.
> +		 * The default should usually reference whichever MOCS entry
> +		 * corresponds to uncached behavior, although use of a WB cached
> +		 * entry is recommended by the spec in certain circumstances on
> +		 * specific platforms.
> +		 */
> +		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
> +		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
> +		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
> +					   CMD_CCTL_MOCS_MASK,
> +					   CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r),
> +					   XE_RTP_ACTION_FLAG(MASKED_REG, ENGINE_BASE))),
> +		},
> +		{}
> +	};
> +
> +	xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe);
> +}
> +
>  /**
>   * xe_wa_process_engine - process engine workaround table
>   * @hwe: engine instance to process workarounds for
> @@ -642,6 +677,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
>  void xe_wa_process_engine(struct xe_hw_engine *hwe)
>  {
>  	xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe);
> +	xe_wa_process_runtime_was_engine(&hwe->reg_sr, hwe->gt, hwe);
>  }
>  
>  /**
> -- 
> 2.40.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the Intel-xe mailing list