[PATCH] drm/i915: Remove unnecessary memory quiescing for aux inval
Tapani Pälli
tapani.palli at intel.com
Wed Sep 20 12:36:52 UTC 2023
I tested this first against tests that were failing for Mesa and it
fixes all of the regressed cases (TGL LP). Also did a full run of all
KHR-GLES31* and KHR-GLES32* test groups in the Khronos CTS suite, no
regressions observed.
Tested-by: Tapani Pälli <tapani.palli at intel.com>
On 20.9.2023 14.11, Nirmoy Das wrote:
> i915 already does memory quiesce before signaling
> breadcrumb so remove extra memory quiescing for aux
> invalidation which can cause unnecessary side effects.
>
> Fixes: 78a6ccd65fa3 ("drm/i915/gt: Ensure memory quiesced before invalidation")
> Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> Cc: Andi Shyti <andi.shyti at linux.intel.com>
> Cc: <stable at vger.kernel.org> # v5.8+
> Cc: Andrzej Hajda <andrzej.hajda at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Cc: Tejas Upadhyay <tejas.upadhyay at intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Cc: Prathap Kumar Valsan <prathap.kumar.valsan at intel.com>
> Cc: Tapani Pälli <tapani.palli at intel.com>
> Cc: Mark Janes <mark.janes at intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Signed-off-by: Nirmoy Das <nirmoy.das at intel.com>
> ---
> drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 50 ++++++++++++------------
> 1 file changed, 26 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index 0143445dba83..5001670046a0 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -248,11 +248,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
> {
> struct intel_engine_cs *engine = rq->engine;
>
> - /*
> - * On Aux CCS platforms the invalidation of the Aux
> - * table requires quiescing memory traffic beforehand
> - */
> - if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
> + if (mode & EMIT_FLUSH) {
> u32 bit_group_0 = 0;
> u32 bit_group_1 = 0;
> int err;
> @@ -264,13 +260,6 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>
> bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
>
> - /*
> - * When required, in MTL and beyond platforms we
> - * need to set the CCS_FLUSH bit in the pipe control
> - */
> - if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> - bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
> -
> bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
> bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
> bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> @@ -800,14 +789,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
> {
> struct drm_i915_private *i915 = rq->i915;
> struct intel_gt *gt = rq->engine->gt;
> - u32 flags = (PIPE_CONTROL_CS_STALL |
> - PIPE_CONTROL_TLB_INVALIDATE |
> - PIPE_CONTROL_TILE_CACHE_FLUSH |
> - PIPE_CONTROL_FLUSH_L3 |
> - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> - PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> - PIPE_CONTROL_DC_FLUSH_ENABLE |
> - PIPE_CONTROL_FLUSH_ENABLE);
> + u32 bit_group_0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
> + u32 bit_group_1 = (PIPE_CONTROL_CS_STALL |
> + PIPE_CONTROL_TLB_INVALIDATE |
> + PIPE_CONTROL_TILE_CACHE_FLUSH |
> + PIPE_CONTROL_FLUSH_L3 |
> + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> + PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> + PIPE_CONTROL_DC_FLUSH_ENABLE |
> + PIPE_CONTROL_FLUSH_ENABLE);
>
> /* Wa_14016712196 */
> if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
> @@ -817,14 +807,26 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
>
> if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
> /* Wa_1409600907 */
> - flags |= PIPE_CONTROL_DEPTH_STALL;
> + bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
>
> if (!HAS_3D_PIPELINE(rq->i915))
> - flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
> + bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
> else if (rq->engine->class == COMPUTE_CLASS)
> - flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
> + bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
> +
> + /*
> + * On Aux CCS platforms the invalidation of the Aux
> + * table requires quiescing memory traffic beforehand.
> + * gen12_emit_fini_breadcrumb_rcs() does this for us as
> + * all memory traffic has to be completed before we signal
> + * the breadcrumb. In MTL and beyond platforms, we need to also
> + * add CCS_FLUSH bit in the pipe control for that purpose.
> + */
> +
> + if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> + bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
>
> - cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
> + cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, 0);
>
> /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
> cs = gen12_emit_ggtt_write_rcs(cs,
More information about the dri-devel
mailing list