[Mesa-stable] [PATCH 1/5] radeonsi: workaround for gather4 on integer cube maps

Wed Sep 13 17:04:30 UTC 2017

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

This is the same workaround that radv already applied in commit
3ece76f03dc0 ("radv/ac: gather4 cube workaround integer").

Fixes dEQP-GLES31.functional.texture.gather.basic.cube.rgba8i/ui.*

Cc: mesa-stable at lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 106 ++++++++++++++++++++--
 1 file changed, 100 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 1e44b68b860..a09ebed23d2 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1667,35 +1667,80 @@ static void tex_fetch_args(
 }
 
 /* Gather4 should follow the same rules as bilinear filtering, but the hardware
  * incorrectly forces nearest filtering if the texture format is integer.
  * The only effect it has on Gather4, which always returns 4 texels for
  * bilinear filtering, is that the final coordinates are off by 0.5 of
  * the texel size.
  *
  * The workaround is to subtract 0.5 from the unnormalized coordinates,
  * or (0.5 / size) from the normalized coordinates.
+ *
+ * However, cube textures with 8_8_8_8 data formats require a different
+ * workaround of overriding the num format to USCALED/SSCALED. This would lose
+ * precision in 32-bit data formats, so it needs to be applied dynamically at
+ * runtime. In this case, return an i1 value that indicates whether the
+ * descriptor was overridden (and hence a fixup of the sampler result is needed).
  */
-static void si_lower_gather4_integer(struct si_shader_context *ctx,
-				     struct ac_image_args *args,
-				     unsigned target)
+static LLVMValueRef
+si_lower_gather4_integer(struct si_shader_context *ctx,
+			 struct ac_image_args *args,
+			 unsigned target,
+			 enum tgsi_return_type return_type)
 {
 	LLVMBuilderRef builder = ctx->gallivm.builder;
 	LLVMValueRef coord = args->addr;
 	LLVMValueRef half_texel[2];
 	/* Texture coordinates start after:
 	 *   {offset, bias, z-compare, derivatives}
 	 * Only the offset and z-compare can occur here.
 	 */
 	unsigned coord_vgpr_index = (int)args->offset + (int)args->compare;
 	int c;
 
+	assert(return_type == TGSI_RETURN_TYPE_SINT ||
+	       return_type == TGSI_RETURN_TYPE_UINT);
+
+	if (target == TGSI_TEXTURE_CUBE ||
+	    target == TGSI_TEXTURE_CUBE_ARRAY) {
+		LLVMValueRef formats;
+		LLVMValueRef data_format;
+		LLVMValueRef wa_formats;
+		LLVMValueRef wa;
+
+		formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, "");
+
+		data_format = LLVMBuildLShr(builder, formats,
+					    LLVMConstInt(ctx->i32, 20, false), "");
+		data_format = LLVMBuildAnd(builder, data_format,
+					   LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
+		wa = LLVMBuildICmp(builder, LLVMIntEQ, data_format,
+				   LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
+				   "");
+
+		uint32_t wa_num_format =
+			return_type == TGSI_RETURN_TYPE_UINT ?
+			S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_USCALED) :
+			S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_SSCALED);
+		wa_formats = LLVMBuildAnd(builder, formats,
+					  LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false),
+					  "");
+		wa_formats = LLVMBuildOr(builder, wa_formats,
+					LLVMConstInt(ctx->i32, wa_num_format, false), "");
+
+		formats = LLVMBuildSelect(builder, wa, wa_formats, formats, "");
+		args->resource = LLVMBuildInsertElement(
+			builder, args->resource, formats, ctx->i32_1, "");
+
+		return wa;
+	}
+
 	if (target == TGSI_TEXTURE_RECT ||
 	    target == TGSI_TEXTURE_SHADOWRECT) {
 		half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
 	} else {
 		struct tgsi_full_instruction txq_inst = {};
 		struct lp_build_emit_data txq_emit_data = {};
 
 		/* Query the texture size. */
 		txq_inst.Texture.Texture = target;
 		txq_emit_data.inst = &txq_inst;
@@ -1724,20 +1769,56 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
 		LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
 
 		tmp = LLVMBuildExtractElement(builder, coord, index, "");
 		tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, "");
 		tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
 		tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
 		coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
 	}
 
 	args->addr = coord;
+
+	return NULL;
+}
+
+/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the
+ * result after the gather operation.
+ */
+static LLVMValueRef
+si_fix_gather4_integer_result(struct si_shader_context *ctx,
+			   LLVMValueRef result,
+			   enum tgsi_return_type return_type,
+			   LLVMValueRef wa)
+{
+	LLVMBuilderRef builder = ctx->gallivm.builder;
+
+	assert(return_type == TGSI_RETURN_TYPE_SINT ||
+	       return_type == TGSI_RETURN_TYPE_UINT);
+
+	for (unsigned chan = 0; chan < 4; ++chan) {
+		LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false);
+		LLVMValueRef value;
+		LLVMValueRef wa_value;
+
+		value = LLVMBuildExtractElement(builder, result, chanv, "");
+
+		if (return_type == TGSI_RETURN_TYPE_UINT)
+			wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, "");
+		else
+			wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, "");
+		wa_value = LLVMBuildBitCast(builder, wa_value, ctx->f32, "");
+		value = LLVMBuildSelect(builder, wa, wa_value, value, "");
+
+		result = LLVMBuildInsertElement(builder, result, value, chanv, "");
+	}
+
+	return result;
 }
 
 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 				struct lp_build_tgsi_context *bld_base,
 				struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	struct ac_image_args args;
 	unsigned opcode = inst->Instruction.Opcode;
@@ -1798,31 +1879,44 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 	case TGSI_OPCODE_TG4:
 		args.opcode = ac_image_gather4;
 		args.level_zero = true;
 		break;
 	default:
 		assert(0);
 		return;
 	}
 
 	/* The hardware needs special lowering for Gather4 with integer formats. */
+	LLVMValueRef gather4_int_result_workaround = NULL;
+
 	if (ctx->screen->b.chip_class <= VI &&
 	    opcode == TGSI_OPCODE_TG4) {
 		assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
 
 		if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
-		    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT)
-			si_lower_gather4_integer(ctx, &args, target);
+		    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
+			gather4_int_result_workaround =
+				si_lower_gather4_integer(ctx, &args, target,
+							 inst->Texture.ReturnType);
+		}
 	}
 
-	emit_data->output[emit_data->chan] =
+	LLVMValueRef result =
 		ac_build_image_opcode(&ctx->ac, &args);
+
+	if (gather4_int_result_workaround) {
+		result = si_fix_gather4_integer_result(ctx, result,
+						       inst->Texture.ReturnType,
+						       gather4_int_result_workaround);
+	}
+
+	emit_data->output[emit_data->chan] = result;
 }
 
 static void si_llvm_emit_txqs(
 	const struct lp_build_tgsi_action *action,
 	struct lp_build_tgsi_context *bld_base,
 	struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
-- 
2.11.0