[PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340

Thomas Hellström thomas.hellstrom at linux.intel.com
Wed Jul 3 09:24:37 UTC 2024


Hi, Matt

On Tue, 2024-07-02 at 16:06 +0100, Matthew Auld wrote:
> This involves enabling l2 caching of host side memory access to VRAM
> through the CPU BAR. The main fallout here is with display since VRAM
> writes from CPU can now be cached in GPU l2, and display is never
> coherent with caches, so needs various manual flushing.  In the case
> of
> fbc we disable it due to complications in getting this to work
> correctly (in a later patch).

What about user-space accesses to framebuffers?

/Thomas


> 
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Cc: Vinod Govindapillai <vinod.govindapillai at intel.com>
> Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
> ---
>  drivers/gpu/drm/xe/Makefile                |  2 +
>  drivers/gpu/drm/xe/display/xe_dsb_buffer.c |  8 ++++
>  drivers/gpu/drm/xe/display/xe_fb_pin.c     |  3 ++
>  drivers/gpu/drm/xe/regs/xe_gt_regs.h       |  8 ++++
>  drivers/gpu/drm/xe/xe_device.c             | 30 ++++++++++++
>  drivers/gpu/drm/xe/xe_device.h             |  1 +
>  drivers/gpu/drm/xe/xe_gt.c                 | 54
> ++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_pat.c                | 11 ++++-
>  drivers/gpu/drm/xe/xe_wa_oob.rules         |  1 +
>  9 files changed, 117 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/Makefile
> b/drivers/gpu/drm/xe/Makefile
> index b1e03bfe4a68..970c5c09e20a 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c
> $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
>  
>  uses_generated_oob := \
>  	$(obj)/xe_ggtt.o \
> +	$(obj)/xe_device.o \
>  	$(obj)/xe_gsc.o \
>  	$(obj)/xe_gt.o \
>  	$(obj)/xe_guc.o \
>  	$(obj)/xe_guc_ads.o \
>  	$(obj)/xe_guc_pc.o \
>  	$(obj)/xe_migrate.o \
> +	$(obj)/xe_pat.o \
>  	$(obj)/xe_ring_ops.o \
>  	$(obj)/xe_vm.o \
>  	$(obj)/xe_wa.o \
> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> index 9e860c61f4b3..ccd0d87d438a 100644
> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> @@ -7,6 +7,8 @@
>  #include "intel_display_types.h"
>  #include "intel_dsb_buffer.h"
>  #include "xe_bo.h"
> +#include "xe_device.h"
> +#include "xe_device_types.h"
>  #include "xe_gt.h"
>  
>  u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
> @@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct
> intel_dsb_buffer *dsb_buf)
>  
>  void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32
> idx, u32 val)
>  {
> +	struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
> +
>  	iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
> +	xe_device_l2_flush(xe);
>  }
>  
>  u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
> @@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer
> *dsb_buf, u32 idx)
>  
>  void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32
> idx, u32 val, size_t size)
>  {
> +	struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
> +
>  	WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf-
> >cmd_buf));
>  
>  	iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val,
> size);
> +	xe_device_l2_flush(xe);
>  }
>  
>  bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct
> intel_dsb_buffer *dsb_buf, size_t size)
> diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> index 423f367c7065..d7db44e79eaf 100644
> --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> @@ -10,6 +10,7 @@
>  #include "intel_fb.h"
>  #include "intel_fb_pin.h"
>  #include "xe_bo.h"
> +#include "xe_device.h"
>  #include "xe_ggtt.h"
>  #include "xe_gt.h"
>  #include "xe_pm.h"
> @@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const
> struct intel_framebuffer *fb,
>  	if (ret)
>  		goto err_unpin;
>  
> +	/* Ensure DPT writes are flushed */
> +	xe_device_l2_flush(xe);
>  	return vma;
>  
>  err_unpin:
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index d44564bad009..fd9d94174efb 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -80,6 +80,9 @@
>  #define   LE_CACHEABILITY_MASK			REG_GENMASK(1, 0)
>  #define  
> LE_CACHEABILITY(value)		REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
>  
> +#define XE2_GAMREQSTRM_CTRL			XE_REG(0x4194)
> +#define   CG_DIS_CNTLBUS			REG_BIT(6)
> +
>  #define CCS_AUX_INV				XE_REG(0x4208)
>  
>  #define VD0_AUX_INV				XE_REG(0x4218)
> @@ -372,6 +375,11 @@
>  
>  #define XEHPC_L3CLOS_MASK(i)			XE_REG_MCR(0xb194 +
> (i) * 8)
>  
> +#define XE2_GLOBAL_INVAL			XE_REG(0xb404)
> +
> +#define SCRATCH1LPFC				XE_REG(0xb474)
> +#define   EN_L3_RW_CCS_CACHE_FLUSH		REG_BIT(0)
> +
>  #define XE2LPM_L3SQCREG5			XE_REG_MCR(0xb658)
>  
>  #define XE2_TDF_CTRL				XE_REG(0xb418)
> diff --git a/drivers/gpu/drm/xe/xe_device.c
> b/drivers/gpu/drm/xe/xe_device.c
> index cfda7cb5df2c..b0f79ef6bce1 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -54,6 +54,9 @@
>  #include "xe_vm.h"
>  #include "xe_vram.h"
>  #include "xe_wait_user_fence.h"
> +#include "xe_wa.h"
> +
> +#include <generated/xe_wa_oob.h>
>  
>  static int xe_file_open(struct drm_device *dev, struct drm_file
> *file)
>  {
> @@ -779,6 +782,11 @@ void xe_device_td_flush(struct xe_device *xe)
>  	if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
>  		return;
>  
> +	if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
> +		xe_device_l2_flush(xe);
> +		return;
> +	}
> +
>  	for_each_gt(gt, xe, id) {
>  		if (xe_gt_is_media_type(gt))
>  			continue;
> @@ -802,6 +810,28 @@ void xe_device_td_flush(struct xe_device *xe)
>  	}
>  }
>  
> +void xe_device_l2_flush(struct xe_device *xe)
> +{
> +	struct xe_gt *gt;
> +	int err;
> +
> +	gt = xe_root_mmio_gt(xe);
> +
> +	if (!XE_WA(gt, 16023588340))
> +		return;
> +
> +	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> +	if (err)
> +		return;
> +
> +	xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
> +
> +	if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150,
> NULL, true))
> +		xe_gt_err_once(gt, "Global invalidation timeout\n");
> +
> +	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +}
> +
>  u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
>  {
>  	return xe_device_has_flat_ccs(xe) ?
> diff --git a/drivers/gpu/drm/xe/xe_device.h
> b/drivers/gpu/drm/xe/xe_device.h
> index bb07f5669dbb..0a2a3e7fd402 100644
> --- a/drivers/gpu/drm/xe/xe_device.h
> +++ b/drivers/gpu/drm/xe/xe_device.h
> @@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device
> *xe, u64 address);
>  u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64
> address);
>  
>  void xe_device_td_flush(struct xe_device *xe);
> +void xe_device_l2_flush(struct xe_device *xe);
>  
>  static inline bool xe_device_wedged(struct xe_device *xe)
>  {
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 29e8ea94d05e..006d3594ba55 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -11,6 +11,8 @@
>  #include <drm/xe_drm.h>
>  #include <generated/xe_wa_oob.h>
>  
> +#include <generated/xe_wa_oob.h>
> +
>  #include "instructions/xe_gfxpipe_commands.h"
>  #include "instructions/xe_mi_commands.h"
>  #include "regs/xe_gt_regs.h"
> @@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
>  	gt->uc.guc.submission_state.enabled = false;
>  }
>  
> +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
> +{
> +	u32 reg;
> +	int err;
> +
> +	if (!XE_WA(gt, 16023588340))
> +		return;
> +
> +	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> +	if (WARN_ON(err))
> +		return;
> +
> +	if (!xe_gt_is_media_type(gt)) {
> +		xe_mmio_write32(gt, SCRATCH1LPFC,
> EN_L3_RW_CCS_CACHE_FLUSH);
> +		reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
> +		reg |= CG_DIS_CNTLBUS;
> +		xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
> +	}
> +
> +	xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
> +	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +}
> +
> +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
> +{
> +	u32 reg;
> +	int err;
> +
> +	if (!XE_WA(gt, 16023588340))
> +		return;
> +
> +	if (xe_gt_is_media_type(gt))
> +		return;
> +
> +	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> +	if (WARN_ON(err))
> +		return;
> +
> +	reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
> +	reg &= ~CG_DIS_CNTLBUS;
> +	xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
> +
> +	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +}
> +
>  /**
>   * xe_gt_remove() - Clean up the GT structures before driver removal
>   * @gt: the GT object
> @@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt)
>  
>  	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
>  		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
> +
> +	xe_gt_disable_host_l2_vram(gt);
>  }
>  
>  static void gt_reset_worker(struct work_struct *w);
> @@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
>  
>  	xe_gt_mcr_init_early(gt);
>  	xe_pat_init(gt);
> +	xe_gt_enable_host_l2_vram(gt);
>  
>  	err = xe_uc_init(&gt->uc);
>  	if (err)
> @@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt)
>  
>  	xe_pat_init(gt);
>  
> +	xe_gt_enable_host_l2_vram(gt);
> +
>  	xe_gt_mcr_set_implicit_defaults(gt);
>  	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
>  
> @@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt)
>  
>  	xe_gt_idle_disable_pg(gt);
>  
> +	xe_gt_disable_host_l2_vram(gt);
> +
>  	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt),
> XE_FORCEWAKE_ALL));
>  	xe_gt_dbg(gt, "suspended\n");
>  
> diff --git a/drivers/gpu/drm/xe/xe_pat.c
> b/drivers/gpu/drm/xe/xe_pat.c
> index 4ee32ee1cc88..722278cc23fc 100644
> --- a/drivers/gpu/drm/xe/xe_pat.c
> +++ b/drivers/gpu/drm/xe/xe_pat.c
> @@ -7,6 +7,8 @@
>  
>  #include <drm/xe_drm.h>
>  
> +#include <generated/xe_wa_oob.h>
> +
>  #include "regs/xe_reg_defs.h"
>  #include "xe_assert.h"
>  #include "xe_device.h"
> @@ -15,6 +17,7 @@
>  #include "xe_gt_mcr.h"
>  #include "xe_mmio.h"
>  #include "xe_sriov.h"
> +#include "xe_wa.h"
>  
>  #define _PAT_ATS				0x47fc
>  #define
> _PAT_INDEX(index)			_PICK_EVEN_2RANGES(index, 8, \
> @@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
>  	if (GRAPHICS_VER(xe) == 20) {
>  		xe->pat.ops = &xe2_pat_ops;
>  		xe->pat.table = xe2_pat_table;
> -		xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
> +
> +		/* Wa_16023588340. XXX: Should use XE_WA */
> +		if (GRAPHICS_VERx100(xe) == 2001)
> +			xe->pat.n_entries = 28; /* Disable CLOS3 */
> +		else
> +			xe->pat.n_entries =
> ARRAY_SIZE(xe2_pat_table);
> +
>  		xe->pat.idx[XE_CACHE_NONE] = 3;
>  		xe->pat.idx[XE_CACHE_WT] = 15;
>  		xe->pat.idx[XE_CACHE_WB] = 2;
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules
> b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index a6b897030fde..c6d8941621c6 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -28,3 +28,4 @@
>  		GRAPHICS_VERSION(2004)
>  13011645652	GRAPHICS_VERSION(2004)
>  22019338487	MEDIA_VERSION(2000)
> +16023588340	GRAPHICS_VERSION(2001)



More information about the Intel-xe mailing list