[Mesa-dev] [PATCH 9/9] ir3/compiler: Handle the new ir3 intrinsics for SSBO

Eduardo Lima Mitev elima at igalia.com
Mon Feb 18 10:03:04 UTC 2019


On 2/13/19 10:29 PM, Eduardo Lima Mitev wrote:
> These intrinsics have the offset in dwords already computed in the last
> source, so the change here is basically using that instead of emitting
> the ir3_SHR to divide the byte-offset by 4.
> 
> The improvement in shader stats is dramatic, of up to ~15% in
> instruction count in some cases. Tested on a5xx.
> 
> shader-db is unfortunately not very useful here because shaders that use
> SSBO require GLSL versions that are not supported by freedreno yet.
> 
> For examples, most Khronos CTS tests under 'dEQP-GLES31.functional.ssbo.*'
> are helped.
> 
> A random case:
> 
> dEQP-GLES31.functional.ssbo.layout.2_level_array.packed.row_major_mat3x2
> 
> with current master:
> 
> ; CL prog 14/1: 1252 instructions, 0 half, 48 full
> ; 8 const, 8 constlen
> ; 61 (ss), 43 (sy)
> 
> with the SSBO dword-offset moved to NIR:
> 
> ; CL prog 14/1: 1053 instructions, 0 half, 45 full
> ; 7 const, 7 constlen
> ; 34 (ss), 73 (sy)
> 
> The SHR previously emitted for every single SSBO instruction disappears
> in most cases, and the dword-offset ends up embedded in the STGB
> instruction as immediate in many cases as well.
> 
> No regressions observed with relevant CTS and piglit tests.
> ---
>  src/freedreno/ir3/ir3_compiler_nir.c | 71 +++++++++++++++++++---------
>  1 file changed, 48 insertions(+), 23 deletions(-)
> 
> diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
> index 0e141f03181..c494913f254 100644
> --- a/src/freedreno/ir3/ir3_compiler_nir.c
> +++ b/src/freedreno/ir3/ir3_compiler_nir.c
> @@ -760,7 +760,12 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
>  		offset,
>  		create_immed(b, 0),
>  	}, 2);
> -	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
> +
> +	/* intrinsic->src[2] holds the dword-offset as placed by
> +	 * 'ir3_nir_lower_io_offsets' pass.
> +	 */
> +	assert(intr->intrinsic == nir_intrinsic_load_ssbo_ir3);

This should be debug_assert(). Fixed locally.

> +	src1 = ir3_get_src(ctx, &intr->src[2])[0];
>  
>  	ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0,
>  			src0, 0, src1, 0);
> @@ -798,7 +803,13 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>  	 * nir already *= 4:
>  	 */
>  	src0 = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
> -	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
> +
> +	/* intrinsic->src[3] holds the dword-offset as placed by
> +	 * 'ir3_nir_lower_io_offsets' pass.
> +	 */
> +	assert(intr->intrinsic == nir_intrinsic_store_ssbo_ir3);

Same here.

> +	src1 = ir3_get_src(ctx, &intr->src[3])[0];
> +
>  	src2 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
>  		offset,
>  		create_immed(b, 0),
> @@ -869,40 +880,50 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>  	 * Note that nir already multiplies the offset by four
>  	 */
>  	src0 = ir3_get_src(ctx, &intr->src[2])[0];
> -	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
> +
> +	/* intrinsic->src[3] holds the dword-offset as placed by
> +	 * 'ir3_nir_lower_io_offsets' pass. It doesn't handle
> +	 * 'atomic_comp_swap', though, because that intrinsic already used
> +	 * all its 4 sources.
> +	 */
> +	if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
> +		src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
> +	else
> +		src1 = ir3_get_src(ctx, &intr->src[3])[0];
> +
>  	src2 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
>  		offset,
>  		create_immed(b, 0),
>  	}, 2);
>  
>  	switch (intr->intrinsic) {
> -	case nir_intrinsic_ssbo_atomic_add:
> +	case nir_intrinsic_ssbo_atomic_add_ir3:
>  		atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
>  		break;
> -	case nir_intrinsic_ssbo_atomic_imin:
> +	case nir_intrinsic_ssbo_atomic_imin_ir3:
>  		atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
>  		type = TYPE_S32;
>  		break;
> -	case nir_intrinsic_ssbo_atomic_umin:
> +	case nir_intrinsic_ssbo_atomic_umin_ir3:
>  		atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
>  		break;
> -	case nir_intrinsic_ssbo_atomic_imax:
> +	case nir_intrinsic_ssbo_atomic_imax_ir3:
>  		atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
>  		type = TYPE_S32;
>  		break;
> -	case nir_intrinsic_ssbo_atomic_umax:
> +	case nir_intrinsic_ssbo_atomic_umax_ir3:
>  		atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
>  		break;
> -	case nir_intrinsic_ssbo_atomic_and:
> +	case nir_intrinsic_ssbo_atomic_and_ir3:
>  		atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
>  		break;
> -	case nir_intrinsic_ssbo_atomic_or:
> +	case nir_intrinsic_ssbo_atomic_or_ir3:
>  		atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
>  		break;
> -	case nir_intrinsic_ssbo_atomic_xor:
> +	case nir_intrinsic_ssbo_atomic_xor_ir3:
>  		atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
>  		break;
> -	case nir_intrinsic_ssbo_atomic_exchange:
> +	case nir_intrinsic_ssbo_atomic_exchange_ir3:
>  		atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
>  		break;
>  	case nir_intrinsic_ssbo_atomic_comp_swap:
> @@ -1639,24 +1660,28 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>  			}
>  		}
>  		break;
> -	case nir_intrinsic_load_ssbo:
> +		/* All SSBO intrinsics (except atomic_comp_swap) should have been
> +		 * lowered by 'lower_io_offsets' pass and replaced by an ir3-specifc
> +		 * version that adds the dword-offset in the last component.
> +		 */
> +	case nir_intrinsic_load_ssbo_ir3:
>  		emit_intrinsic_load_ssbo(ctx, intr, dst);
>  		break;
> -	case nir_intrinsic_store_ssbo:
> +	case nir_intrinsic_store_ssbo_ir3:
>  		emit_intrinsic_store_ssbo(ctx, intr);
>  		break;
>  	case nir_intrinsic_get_buffer_size:
>  		emit_intrinsic_ssbo_size(ctx, intr, dst);
>  		break;
> -	case nir_intrinsic_ssbo_atomic_add:
> -	case nir_intrinsic_ssbo_atomic_imin:
> -	case nir_intrinsic_ssbo_atomic_umin:
> -	case nir_intrinsic_ssbo_atomic_imax:
> -	case nir_intrinsic_ssbo_atomic_umax:
> -	case nir_intrinsic_ssbo_atomic_and:
> -	case nir_intrinsic_ssbo_atomic_or:
> -	case nir_intrinsic_ssbo_atomic_xor:
> -	case nir_intrinsic_ssbo_atomic_exchange:
> +	case nir_intrinsic_ssbo_atomic_add_ir3:
> +	case nir_intrinsic_ssbo_atomic_imin_ir3:
> +	case nir_intrinsic_ssbo_atomic_umin_ir3:
> +	case nir_intrinsic_ssbo_atomic_imax_ir3:
> +	case nir_intrinsic_ssbo_atomic_umax_ir3:
> +	case nir_intrinsic_ssbo_atomic_and_ir3:
> +	case nir_intrinsic_ssbo_atomic_or_ir3:
> +	case nir_intrinsic_ssbo_atomic_xor_ir3:
> +	case nir_intrinsic_ssbo_atomic_exchange_ir3:
>  	case nir_intrinsic_ssbo_atomic_comp_swap:
>  		dst[0] = emit_intrinsic_atomic_ssbo(ctx, intr);
>  		break;
> 


More information about the mesa-dev mailing list