[PATCH v3 1/2] drm/xe/bmg: implement Wa_16023588340
Thomas Hellström
thomas.hellstrom at linux.intel.com
Wed Jul 3 09:24:37 UTC 2024
Hi, Matt
On Tue, 2024-07-02 at 16:06 +0100, Matthew Auld wrote:
> This involves enabling l2 caching of host side memory access to VRAM
> through the CPU BAR. The main fallout here is with display since VRAM
> writes from CPU can now be cached in GPU l2, and display is never
> coherent with caches, so needs various manual flushing. In the case
> of
> fbc we disable it due to complications in getting this to work
> correctly (in a later patch).
What about user-space accesses to framebuffers?
/Thomas
>
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Cc: Vinod Govindapillai <vinod.govindapillai at intel.com>
> Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
> ---
> drivers/gpu/drm/xe/Makefile | 2 +
> drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 8 ++++
> drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 ++
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 ++++
> drivers/gpu/drm/xe/xe_device.c | 30 ++++++++++++
> drivers/gpu/drm/xe/xe_device.h | 1 +
> drivers/gpu/drm/xe/xe_gt.c | 54
> ++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_pat.c | 11 ++++-
> drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
> 9 files changed, 117 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/xe/Makefile
> b/drivers/gpu/drm/xe/Makefile
> index b1e03bfe4a68..970c5c09e20a 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c
> $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
>
> uses_generated_oob := \
> $(obj)/xe_ggtt.o \
> + $(obj)/xe_device.o \
> $(obj)/xe_gsc.o \
> $(obj)/xe_gt.o \
> $(obj)/xe_guc.o \
> $(obj)/xe_guc_ads.o \
> $(obj)/xe_guc_pc.o \
> $(obj)/xe_migrate.o \
> + $(obj)/xe_pat.o \
> $(obj)/xe_ring_ops.o \
> $(obj)/xe_vm.o \
> $(obj)/xe_wa.o \
> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> index 9e860c61f4b3..ccd0d87d438a 100644
> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> @@ -7,6 +7,8 @@
> #include "intel_display_types.h"
> #include "intel_dsb_buffer.h"
> #include "xe_bo.h"
> +#include "xe_device.h"
> +#include "xe_device_types.h"
> #include "xe_gt.h"
>
> u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
> @@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct
> intel_dsb_buffer *dsb_buf)
>
> void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32
> idx, u32 val)
> {
> + struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
> +
> iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
> + xe_device_l2_flush(xe);
> }
>
> u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
> @@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer
> *dsb_buf, u32 idx)
>
> void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32
> idx, u32 val, size_t size)
> {
> + struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
> +
> WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf-
> >cmd_buf));
>
> iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val,
> size);
> + xe_device_l2_flush(xe);
> }
>
> bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct
> intel_dsb_buffer *dsb_buf, size_t size)
> diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> index 423f367c7065..d7db44e79eaf 100644
> --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> @@ -10,6 +10,7 @@
> #include "intel_fb.h"
> #include "intel_fb_pin.h"
> #include "xe_bo.h"
> +#include "xe_device.h"
> #include "xe_ggtt.h"
> #include "xe_gt.h"
> #include "xe_pm.h"
> @@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const
> struct intel_framebuffer *fb,
> if (ret)
> goto err_unpin;
>
> + /* Ensure DPT writes are flushed */
> + xe_device_l2_flush(xe);
> return vma;
>
> err_unpin:
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index d44564bad009..fd9d94174efb 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -80,6 +80,9 @@
> #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
> #define
> LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
>
> +#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
> +#define CG_DIS_CNTLBUS REG_BIT(6)
> +
> #define CCS_AUX_INV XE_REG(0x4208)
>
> #define VD0_AUX_INV XE_REG(0x4218)
> @@ -372,6 +375,11 @@
>
> #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 +
> (i) * 8)
>
> +#define XE2_GLOBAL_INVAL XE_REG(0xb404)
> +
> +#define SCRATCH1LPFC XE_REG(0xb474)
> +#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
> +
> #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
>
> #define XE2_TDF_CTRL XE_REG(0xb418)
> diff --git a/drivers/gpu/drm/xe/xe_device.c
> b/drivers/gpu/drm/xe/xe_device.c
> index cfda7cb5df2c..b0f79ef6bce1 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -54,6 +54,9 @@
> #include "xe_vm.h"
> #include "xe_vram.h"
> #include "xe_wait_user_fence.h"
> +#include "xe_wa.h"
> +
> +#include <generated/xe_wa_oob.h>
>
> static int xe_file_open(struct drm_device *dev, struct drm_file
> *file)
> {
> @@ -779,6 +782,11 @@ void xe_device_td_flush(struct xe_device *xe)
> if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
> return;
>
> + if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
> + xe_device_l2_flush(xe);
> + return;
> + }
> +
> for_each_gt(gt, xe, id) {
> if (xe_gt_is_media_type(gt))
> continue;
> @@ -802,6 +810,28 @@ void xe_device_td_flush(struct xe_device *xe)
> }
> }
>
> +void xe_device_l2_flush(struct xe_device *xe)
> +{
> + struct xe_gt *gt;
> + int err;
> +
> + gt = xe_root_mmio_gt(xe);
> +
> + if (!XE_WA(gt, 16023588340))
> + return;
> +
> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> + if (err)
> + return;
> +
> + xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
> +
> + if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150,
> NULL, true))
> + xe_gt_err_once(gt, "Global invalidation timeout\n");
> +
> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +}
> +
> u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
> {
> return xe_device_has_flat_ccs(xe) ?
> diff --git a/drivers/gpu/drm/xe/xe_device.h
> b/drivers/gpu/drm/xe/xe_device.h
> index bb07f5669dbb..0a2a3e7fd402 100644
> --- a/drivers/gpu/drm/xe/xe_device.h
> +++ b/drivers/gpu/drm/xe/xe_device.h
> @@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device
> *xe, u64 address);
> u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64
> address);
>
> void xe_device_td_flush(struct xe_device *xe);
> +void xe_device_l2_flush(struct xe_device *xe);
>
> static inline bool xe_device_wedged(struct xe_device *xe)
> {
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 29e8ea94d05e..006d3594ba55 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -11,6 +11,8 @@
> #include <drm/xe_drm.h>
> #include <generated/xe_wa_oob.h>
>
> +#include <generated/xe_wa_oob.h>
> +
> #include "instructions/xe_gfxpipe_commands.h"
> #include "instructions/xe_mi_commands.h"
> #include "regs/xe_gt_regs.h"
> @@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
> gt->uc.guc.submission_state.enabled = false;
> }
>
> +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
> +{
> + u32 reg;
> + int err;
> +
> + if (!XE_WA(gt, 16023588340))
> + return;
> +
> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> + if (WARN_ON(err))
> + return;
> +
> + if (!xe_gt_is_media_type(gt)) {
> + xe_mmio_write32(gt, SCRATCH1LPFC,
> EN_L3_RW_CCS_CACHE_FLUSH);
> + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
> + reg |= CG_DIS_CNTLBUS;
> + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
> + }
> +
> + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +}
> +
> +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
> +{
> + u32 reg;
> + int err;
> +
> + if (!XE_WA(gt, 16023588340))
> + return;
> +
> + if (xe_gt_is_media_type(gt))
> + return;
> +
> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> + if (WARN_ON(err))
> + return;
> +
> + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
> + reg &= ~CG_DIS_CNTLBUS;
> + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
> +
> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +}
> +
> /**
> * xe_gt_remove() - Clean up the GT structures before driver removal
> * @gt: the GT object
> @@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt)
>
> for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
> xe_hw_fence_irq_finish(>->fence_irq[i]);
> +
> + xe_gt_disable_host_l2_vram(gt);
> }
>
> static void gt_reset_worker(struct work_struct *w);
> @@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
>
> xe_gt_mcr_init_early(gt);
> xe_pat_init(gt);
> + xe_gt_enable_host_l2_vram(gt);
>
> err = xe_uc_init(>->uc);
> if (err)
> @@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt)
>
> xe_pat_init(gt);
>
> + xe_gt_enable_host_l2_vram(gt);
> +
> xe_gt_mcr_set_implicit_defaults(gt);
> xe_reg_sr_apply_mmio(>->reg_sr, gt);
>
> @@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt)
>
> xe_gt_idle_disable_pg(gt);
>
> + xe_gt_disable_host_l2_vram(gt);
> +
> XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt),
> XE_FORCEWAKE_ALL));
> xe_gt_dbg(gt, "suspended\n");
>
> diff --git a/drivers/gpu/drm/xe/xe_pat.c
> b/drivers/gpu/drm/xe/xe_pat.c
> index 4ee32ee1cc88..722278cc23fc 100644
> --- a/drivers/gpu/drm/xe/xe_pat.c
> +++ b/drivers/gpu/drm/xe/xe_pat.c
> @@ -7,6 +7,8 @@
>
> #include <drm/xe_drm.h>
>
> +#include <generated/xe_wa_oob.h>
> +
> #include "regs/xe_reg_defs.h"
> #include "xe_assert.h"
> #include "xe_device.h"
> @@ -15,6 +17,7 @@
> #include "xe_gt_mcr.h"
> #include "xe_mmio.h"
> #include "xe_sriov.h"
> +#include "xe_wa.h"
>
> #define _PAT_ATS 0x47fc
> #define
> _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
> @@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
> if (GRAPHICS_VER(xe) == 20) {
> xe->pat.ops = &xe2_pat_ops;
> xe->pat.table = xe2_pat_table;
> - xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
> +
> + /* Wa_16023588340. XXX: Should use XE_WA */
> + if (GRAPHICS_VERx100(xe) == 2001)
> + xe->pat.n_entries = 28; /* Disable CLOS3 */
> + else
> + xe->pat.n_entries =
> ARRAY_SIZE(xe2_pat_table);
> +
> xe->pat.idx[XE_CACHE_NONE] = 3;
> xe->pat.idx[XE_CACHE_WT] = 15;
> xe->pat.idx[XE_CACHE_WB] = 2;
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules
> b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index a6b897030fde..c6d8941621c6 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -28,3 +28,4 @@
> GRAPHICS_VERSION(2004)
> 13011645652 GRAPHICS_VERSION(2004)
> 22019338487 MEDIA_VERSION(2000)
> +16023588340 GRAPHICS_VERSION(2001)
More information about the Intel-gfx
mailing list