[Intel-gfx] [PATCH v4 2/4] drm/i915: Add WABB blit for Wa_16018031267 / Wa_16018063123

Andi Shyti andi.shyti at linux.intel.com
Tue Oct 24 11:37:32 UTC 2023


Hi Andrzej,

On Mon, Oct 23, 2023 at 10:21:46PM +0200, Andrzej Hajda wrote:
> Apply WABB blit for Wa_16018031267 / Wa_16018063123.
> 
> v3: drop unused enum definition
> v4: move selftest to separate patch, use wa only on BCS0.
> 
> Co-developed-by: Nirmoy Das <nirmoy.das at intel.com>
> Co-developed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>

Signed-off-by: Nirmoy Das <nirmoy.das at intel.com>
Signed-off-by: Jonathan Cavitt <jonathan.cavitt at intel.com>

> Signed-off-by: Andrzej Hajda <andrzej.hajda at intel.com>

Reviewed-by: Andi Shyti <andi.shyti at linux.intel.com>

Two little not binding notes below.

...

> -static u32 *context_indirect_bb(const struct intel_context *ce)
> +/*
> + * per_ctx below determines which WABB section is used.
> + * When true, the function returns the location of the
> + * PER_CTX_BB.  When false, the function returns the
> + * location of the INDIRECT_CTX.
> + */
> +static u32 *context_wabb(const struct intel_context *ce, bool per_ctx)
>  {
>  	void *ptr;
>  
> @@ -1029,6 +1047,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce)
>  	ptr = ce->lrc_reg_state;
>  	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
>  	ptr += context_wa_bb_offset(ce);
> +	ptr += per_ctx ? PAGE_SIZE : 0;

I'm not a big fan of bools here... I'd rather add as parameter
an offset value and just do:

	ptr += offset;

Your choice, not strong on this.

>  
>  	return ptr;
>  }
> @@ -1105,7 +1124,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
>  
>  	if (GRAPHICS_VER(engine->i915) >= 12) {
>  		ce->wa_bb_page = context_size / PAGE_SIZE;
> -		context_size += PAGE_SIZE;
> +		/* INDIRECT_CTX and PER_CTX_BB need separate pages. */
> +		context_size += PAGE_SIZE * 2;
>  	}
>  
>  	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
> @@ -1407,12 +1427,85 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
>  	return gen12_emit_aux_table_inv(ce->engine, cs);
>  }
>  
> +static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs)
> +{
> +	struct intel_gt *gt = ce->engine->gt;
> +	int mocs = gt->mocs.uc_index << 1;
> +
> +	/**
> +	 * Wa_16018031267 / Wa_16018063123 requires that SW forces the
> +	 * main copy engine arbitration into round robin mode.  We
> +	 * additionally need to submit the following WABB blt command
> +	 * to produce 4 subblits with each subblit generating 0 byte
> +	 * write requests as WABB:
> +	 *
> +	 * XY_FASTCOLOR_BLT
> +	 *  BG0    -> 5100000E
> +	 *  BG1    -> 0000003F (Dest pitch)
> +	 *  BG2    -> 00000000 (X1, Y1) = (0, 0)
> +	 *  BG3    -> 00040001 (X2, Y2) = (1, 4)
> +	 *  BG4    -> scratch
> +	 *  BG5    -> scratch
> +	 *  BG6-12 -> 00000000
> +	 *  BG13   -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
> +	 *  BG14   -> 00000010 (Qpitch = 4)
> +	 *  BG15   -> 00000000
> +	 */
> +	*cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2);
> +	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f;
> +	*cs++ = 0;
> +	*cs++ = 4 << 16 | 1;
> +	*cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd));
> +	*cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd));
> +	*cs++ = 0;
> +	*cs++ = 0;
> +	*cs++ = 0;
> +	*cs++ = 0;
> +	*cs++ = 0;
> +	*cs++ = 0;
> +	*cs++ = 0;
> +	*cs++ = 0x20004004;
> +	*cs++ = 0x10;
> +	*cs++ = 0;
> +
> +	return cs;
> +}
> +
> +static u32 *
> +xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs)
> +{
> +	/* Wa_16018031267, Wa_16018063123 */
> +	if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
> +		cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
> +
> +	return cs;
> +}

Is this function necessary? Can't we just add in
xehp_emit_fastcolor_blt_wabb()

	if (!NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
		return cs;

Andi


More information about the Intel-gfx mailing list