[Mesa-dev] [PATCH 4/4] radeonsi: Use FP16 shader export format when necessary / possible.

Tom Stellard thomas.stellard at amd.com
Fri Aug 24 06:36:42 PDT 2012


On Fri, Aug 24, 2012 at 02:53:01PM +0200, Michel Dänzer wrote:
> From: Michel Dänzer <michel.daenzer at amd.com>
> 
> Fixes piglit fbo-blending-formats.
> 
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> ---
>  src/gallium/drivers/radeon/SIInstructions.td   |    4 +-
>  src/gallium/drivers/radeon/SIIntrinsics.td     |    1 +
>  src/gallium/drivers/radeonsi/radeonsi_pipe.h   |    3 +-
>  src/gallium/drivers/radeonsi/radeonsi_shader.c |   51 ++++++++++++++----
>  src/gallium/drivers/radeonsi/si_state.c        |   69 +++++++++++++++++++++++-
>  src/gallium/drivers/radeonsi/si_state_draw.c   |    4 --
>  6 files changed, 114 insertions(+), 18 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
> index f09d604..3047321 100644
> --- a/src/gallium/drivers/radeon/SIInstructions.td
> +++ b/src/gallium/drivers/radeon/SIInstructions.td
> @@ -726,7 +726,9 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
>  ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
>  ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
>  ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
> -////def V_CVT_PKRTZ_F16_F32 : VOP2_F16 <0x0000002f, "V_CVT_PKRTZ_F16_F32", []>;
> +defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
> + [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))]
> +>;
>  ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
>  ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
>  def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
> diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td
> index 6eadc94..b9544f1 100644
> --- a/src/gallium/drivers/radeon/SIIntrinsics.td
> +++ b/src/gallium/drivers/radeon/SIIntrinsics.td
> @@ -14,6 +14,7 @@
>  
>  let TargetPrefix = "SI", isTarget = 1 in {
>  
> +  def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
>    def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
>    /* XXX: We may need a seperate intrinsic here for loading integer values */
>    def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> index 989bb49..099b509 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> @@ -134,7 +134,8 @@ struct r600_context {
>  	unsigned			saved_render_cond_mode;
>  	/* shader information */
>  	unsigned			sprite_coord_enable;
> -	boolean				export_16bpc;
> +	unsigned			export_16bpc;
> +	unsigned			spi_shader_col_format;
>  	unsigned			alpha_ref;
>  	boolean				alpha_ref_dirty;
>  	struct r600_textures_info	vs_samplers;
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> index fd614dd..98866c4 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> @@ -390,13 +390,47 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
>  	unsigned compressed = 0;
>  	unsigned chan;
>  
> -	for (chan = 0; chan < 4; chan++ ) {
> -		LLVMValueRef out_ptr =
> -			si_shader_ctx->radeon_bld.soa.outputs[index][chan];
> -		/* +5 because the first output value will be
> -		 * the 6th argument to the intrinsic. */
> -		args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
> -					       out_ptr, "");
> +	if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
> +		int cbuf = target - V_008DFC_SQ_EXP_MRT;
> +
> +		if (cbuf >= 0 && cbuf < 8) {
> +			struct r600_context *rctx = si_shader_ctx->rctx;
> +			compressed = (rctx->export_16bpc >> cbuf) & 0x1;
> +		}
> +	}
> +
> +	if (compressed) {
> +		/* Pixel shader needs to pack output values before export */
> +		for (chan = 0; chan < 2; chan++ ) {
> +			LLVMValueRef *out_ptr =
> +				si_shader_ctx->radeon_bld.soa.outputs[index];
> +			args[0] = LLVMBuildLoad(base->gallivm->builder,
> +						out_ptr[2 * chan], "");
> +			args[1] = LLVMBuildLoad(base->gallivm->builder,
> +						out_ptr[2 * chan + 1], "");
> +			args[chan + 5] =
> +				build_intrinsic(base->gallivm->builder,
> +						"llvm.SI.packf16",
> +						LLVMInt32TypeInContext(base->gallivm->context),
> +						args, 2,
> +						LLVMReadNoneAttribute);
> +			args[chan + 7] = args[chan + 5];
> +		}
> +
> +		/* Set COMPR flag */
> +		args[4] = uint->one;
> +	} else {
> +		for (chan = 0; chan < 4; chan++ ) {
> +			LLVMValueRef out_ptr =
> +				si_shader_ctx->radeon_bld.soa.outputs[index][chan];
> +			/* +5 because the first output value will be
> +			 * the 6th argument to the intrinsic. */
> +			args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
> +						       out_ptr, "");
> +		}
> +
> +		/* Clear COMPR flag */
> +		args[4] = uint->zero;
>  	}
>  
>  	/* XXX: This controls which components of the output
> @@ -415,9 +449,6 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
>  	/* Specify the target we are exporting */
>  	args[3] = lp_build_const_int32(base->gallivm, target);
>  
> -	/* Set COMPR flag */
> -	args[4] = uint->zero;
> -
>  	/* XXX: We probably need to keep track of the output
>  	 * values, so we know what we are passing to the next
>  	 * stage. */
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 36ac6bf..5f7f415 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -996,6 +996,53 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
>  	}
>  }
>  
> +/* Returns the size in bits of the widest component of a CB format */
> +static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
> +{
> +	switch(colorformat) {
> +	case V_028C70_COLOR_4_4_4_4:
> +		return 4;
> +
> +	case V_028C70_COLOR_1_5_5_5:
> +	case V_028C70_COLOR_5_5_5_1:
> +		return 5;
> +
> +	case V_028C70_COLOR_5_6_5:
> +		return 6;
> +
> +	case V_028C70_COLOR_8:
> +	case V_028C70_COLOR_8_8:
> +	case V_028C70_COLOR_8_8_8_8:
> +		return 8;
> +
> +	case V_028C70_COLOR_10_10_10_2:
> +	case V_028C70_COLOR_2_10_10_10:
> +		return 10;
> +
> +	case V_028C70_COLOR_10_11_11:
> +	case V_028C70_COLOR_11_11_10:
> +		return 11;
> +
> +	case V_028C70_COLOR_16:
> +	case V_028C70_COLOR_16_16:
> +	case V_028C70_COLOR_16_16_16_16:
> +		return 16;
> +
> +	case V_028C70_COLOR_8_24:
> +	case V_028C70_COLOR_24_8:
> +		return 24;
> +
> +	case V_028C70_COLOR_32:
> +	case V_028C70_COLOR_32_32:
> +	case V_028C70_COLOR_32_32_32_32:
> +	case V_028C70_COLOR_X24_8_32_FLOAT:
> +		return 32;
> +	}
> +
> +	assert(!"Unknown maximum component size");
> +	return 0;
> +}
> +
>  static uint32_t si_translate_dbformat(enum pipe_format format)
>  {
>  	switch (format) {
> @@ -1409,6 +1456,7 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
>  	const struct util_format_description *desc;
>  	int i;
>  	unsigned blend_clamp = 0, blend_bypass = 0;
> +	unsigned max_comp_size;
>  
>  	surf = (struct r600_surface *)state->cbufs[cb];
>  	rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
> @@ -1549,6 +1597,17 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
>  	}
>  	si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info);
>  	si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib);
> +
> +	/* Determine pixel shader export format */
> +	max_comp_size = si_colorformat_max_comp_size(format);
> +	if (ntype == V_028C70_NUMBER_SRGB ||
> +	    ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
> +	     max_comp_size <= 10) ||
> +	    (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
> +		rctx->export_16bpc |= 1 << cb;
> +		rctx->spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * cb);
> +	} else
> +		rctx->spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * cb);
>  }
>  
>  static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
> @@ -1667,9 +1726,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>  
>  	/* build states */
>  	rctx->have_depth_fb = 0;
> +	rctx->export_16bpc = 0;
> +	rctx->spi_shader_col_format = 0;
>  	for (int i = 0; i < state->nr_cbufs; i++) {
>  		si_cb(rctx, pm4, state, i);
>  	}
> +	assert(!(rctx->export_16bpc & ~0xff));
>  	si_db(rctx, pm4, state);
>  
>  	shader_mask = 0;
> @@ -1706,6 +1768,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>  	si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000);
>  	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
>  	si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader_mask);
> +	si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
> +		       rctx->spi_shader_col_format);
>  	si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0x00000000);
>  
>  	si_pm4_set_state(rctx, framebuffer, pm4);
> @@ -1727,9 +1791,10 @@ static INLINE unsigned si_shader_selector_key(struct pipe_context *ctx,
>  	if (sel->type == PIPE_SHADER_FRAGMENT) {
>  		if (sel->fs_write_all)
>  			key |= rctx->framebuffer.nr_cbufs;
> +		key |= rctx->export_16bpc << 4;
>  		/*if (rctx->queued.named.rasterizer)
> -			  key |= rctx->queued.named.rasterizer->flatshade << 4;*/
> -		/*key |== rctx->two_side << 5;*/
> +			  key |= rctx->queued.named.rasterizer->flatshade << 12;*/
> +		/*key |== rctx->two_side << 13;*/
>  	}
>  
>  	return key;
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 95821dc..5f8e211 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -186,10 +186,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s
>  	/* XXX: Depends on Z buffer format? */
>  	si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, 0);
>  
> -	/* XXX: Depends on color buffer format? */
> -	si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
> -		       S_028714_COL0_EXPORT_FORMAT(V_028714_SPI_SHADER_32_ABGR));
> -
>  	va = r600_resource_va(ctx->screen, (void *)shader->bo);
>  	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
>  	si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
> -- 
> 1.7.10.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list