[Mesa-dev] [PATCH 1/2] radeonsi: implement ARB_texture_gather and Gather functions from GLSL 4.00

Marek Olšák maraeo at gmail.com
Fri Jun 6 14:58:23 PDT 2014


From: Marek Olšák <marek.olsak at amd.com>

All ARB_texture_gather and gather-related ARB_gpu_shader5 piglit tests pass.
---
 docs/GL3.txt                             |   2 +-
 docs/relnotes/10.3.html                  |   1 +
 src/gallium/drivers/radeonsi/si_pipe.c   |  12 ++-
 src/gallium/drivers/radeonsi/si_shader.c | 123 +++++++++++++++++++++++++++++--
 4 files changed, 125 insertions(+), 13 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index c98dd9f..d505ffd 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -119,7 +119,7 @@ GL 4.0:
   GL_ARB_tessellation_shader                           not started
   GL_ARB_texture_buffer_object_rgb32                   DONE (i965, nvc0, r600, radeonsi, softpipe)
   GL_ARB_texture_cube_map_array                        DONE (i965, nv50, nvc0, r600, radeonsi, softpipe)
-  GL_ARB_texture_gather                                DONE (i965, nv50, nvc0)
+  GL_ARB_texture_gather                                DONE (i965, nv50, nvc0, radeonsi)
   GL_ARB_transform_feedback2                           DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_transform_feedback3                           DONE (i965, nv50, nvc0, r600, radeonsi)
 
diff --git a/docs/relnotes/10.3.html b/docs/relnotes/10.3.html
index 0c8114b..f9a8114 100644
--- a/docs/relnotes/10.3.html
+++ b/docs/relnotes/10.3.html
@@ -47,6 +47,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_sample_shading on radeonsi</li>
 <li>GL_ARB_stencil_texturing on nv50, nvc0, r600, and radeonsi</li>
 <li>GL_ARB_texture_cube_map_array on radeonsi</li>
+<li>GL_ARB_texture_gather on radeonsi</li>
 </ul>
 
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4b96f20..a3e1846 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -234,6 +234,11 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
 		return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
 
+	case PIPE_CAP_TEXTURE_GATHER_SM5:
+		return HAVE_LLVM >= 0x0305;
+	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+		return HAVE_LLVM >= 0x0305 ? 4 : 0;
+
 	/* Unsupported features. */
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
 	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
@@ -242,8 +247,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 	case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
-	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-	case PIPE_CAP_TEXTURE_GATHER_SM5:
 	case PIPE_CAP_TGSI_TEXCOORD:
 	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_TEXTURE_QUERY_LOD:
@@ -294,11 +297,12 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 
  	case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
 	case PIPE_CAP_MIN_TEXEL_OFFSET:
-		return -8;
+		return -32;
 
  	case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
 	case PIPE_CAP_MAX_TEXEL_OFFSET:
-		return 7;
+		return 31;
+
 	case PIPE_CAP_ENDIANNESS:
 		return PIPE_ENDIAN_LITTLE;
 	}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c540414..d7e588d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1538,6 +1538,17 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 				struct lp_build_tgsi_context * bld_base,
 				struct lp_build_emit_data * emit_data);
 
+static bool tgsi_is_shadow_sampler(unsigned target)
+{
+	return target == TGSI_TEXTURE_SHADOW1D ||
+	       target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
+	       target == TGSI_TEXTURE_SHADOW2D ||
+	       target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
+	       target == TGSI_TEXTURE_SHADOWCUBE ||
+	       target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
+	       target == TGSI_TEXTURE_SHADOWRECT;
+}
+
 static void tex_fetch_args(
 	struct lp_build_tgsi_context * bld_base,
 	struct lp_build_emit_data * emit_data)
@@ -1592,6 +1603,31 @@ static void tex_fetch_args(
 	if (opcode == TGSI_OPCODE_TXP)
 		coords[3] = bld_base->base.one;
 
+	/* Pack offsets. */
+	if (opcode == TGSI_OPCODE_TG4 &&
+	    inst->Texture.NumOffsets) {
+		/* The offsets are six-bit signed integers packed like this:
+		 *   X=[5:0], Y=[13:8], and Z=[21:16].
+		 */
+		LLVMValueRef offset[3], pack;
+
+		assert(inst->Texture.NumOffsets == 1);
+
+		for (chan = 0; chan < 3; chan++) {
+			offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
+								     emit_data->inst, 0, chan);
+			offset[chan] = LLVMBuildAnd(gallivm->builder, offset[chan],
+						    lp_build_const_int32(gallivm, 0x3f), "");
+			if (chan)
+				offset[chan] = LLVMBuildShl(gallivm->builder, offset[chan],
+							    lp_build_const_int32(gallivm, chan*8), "");
+		}
+
+		pack = LLVMBuildOr(gallivm->builder, offset[0], offset[1], "");
+		pack = LLVMBuildOr(gallivm->builder, pack, offset[2], "");
+		address[count++] = pack;
+	}
+
 	/* Pack LOD bias value */
 	if (opcode == TGSI_OPCODE_TXB)
 		address[count++] = coords[3];
@@ -1779,20 +1815,66 @@ static void tex_fetch_args(
 			}
 		}
 
+		emit_data->args[2] = lp_build_const_int32(gallivm, target);
+		emit_data->arg_count = 3;
+
 		emit_data->dst_type = LLVMVectorType(
 			LLVMInt32TypeInContext(bld_base->base.gallivm->context),
 			4);
+	} else if (opcode == TGSI_OPCODE_TG4) {
+		unsigned is_array = target == TGSI_TEXTURE_1D_ARRAY ||
+				    target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
+				    target == TGSI_TEXTURE_2D_ARRAY ||
+				    target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
+				    target == TGSI_TEXTURE_CUBE_ARRAY ||
+				    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY;
+		unsigned is_rect = target == TGSI_TEXTURE_RECT;
+		unsigned gather_comp = 0;
+
+		/* DMASK was repurposed for GATHER4. 4 components are always
+		 * returned and DMASK works like a swizzle - it selects
+		 * the component to fetch. The only valid DMASK values are
+		 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+		 * (red,red,red,red) etc.) The ISA document doesn't mention
+		 * this.
+		 */
+
+		/* Get the component index from src1.x for Gather4. */
+		if (!tgsi_is_shadow_sampler(target)) {
+			LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
+			LLVMValueRef comp_imm;
+			struct tgsi_src_register src1 = inst->Src[1].Register;
+
+			assert(src1.File == TGSI_FILE_IMMEDIATE);
+
+			comp_imm = imms[src1.Index][src1.SwizzleX];
+			gather_comp = LLVMConstIntGetZExtValue(comp_imm);
+			gather_comp = CLAMP(gather_comp, 0, 3);
+		}
 
-		emit_data->arg_count = 3;
-	} else {
-		/* Sampler */
 		emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+		emit_data->args[3] = lp_build_const_int32(gallivm, 1 << gather_comp); /* dmask */
+		emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
+		emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
+		emit_data->args[6] = lp_build_const_int32(gallivm, is_array); /* da */
+		emit_data->args[7] = lp_build_const_int32(gallivm, 0); /* glc */
+		emit_data->args[8] = lp_build_const_int32(gallivm, 0); /* slc */
+		emit_data->args[9] = lp_build_const_int32(gallivm, 0); /* tfe */
+		emit_data->args[10] = lp_build_const_int32(gallivm, 0); /* lwe */
+
+		emit_data->arg_count = 11;
 
 		emit_data->dst_type = LLVMVectorType(
 			LLVMFloatTypeInContext(bld_base->base.gallivm->context),
 			4);
-
+	} else {
+		emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+		emit_data->args[3] = lp_build_const_int32(gallivm, target);
 		emit_data->arg_count = 4;
+
+		emit_data->dst_type = LLVMVectorType(
+			LLVMFloatTypeInContext(gallivm->context),
+			4);
 	}
 
 	/* The fetch opcode has been converted to a 2D array fetch.
@@ -1802,10 +1884,6 @@ static void tex_fetch_args(
 	else if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
 		target = TGSI_TEXTURE_SHADOW2D_ARRAY;
 
-	/* Dimensions */
-	emit_data->args[emit_data->arg_count - 1] =
-		lp_build_const_int32(bld_base->base.gallivm, target);
-
 	/* Pad to power of two vector */
 	while (count < util_next_power_of_two(count))
 		address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
@@ -1838,6 +1916,28 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 		LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 }
 
+static void build_new_tex_intrinsic(const struct lp_build_tgsi_action * action,
+				    struct lp_build_tgsi_context * bld_base,
+				    struct lp_build_emit_data * emit_data)
+{
+	struct lp_build_context * base = &bld_base->base;
+	char intr_name[127];
+	unsigned target = emit_data->inst->Texture.Texture;
+	bool is_shadow = tgsi_is_shadow_sampler(target);
+
+	/* Add the type and suffixes .c, .o if needed. */
+	sprintf(intr_name, "%s%s%s.v%ui32",
+		action->intr_name,
+		is_shadow ? ".c" : "",
+		emit_data->inst->Texture.NumOffsets ? ".o" : "",
+		LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
+
+	emit_data->output[emit_data->chan] = build_intrinsic(
+		base->gallivm->builder, intr_name, emit_data->dst_type,
+		emit_data->args, emit_data->arg_count,
+		LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+}
+
 static void txq_fetch_args(
 	struct lp_build_tgsi_context * bld_base,
 	struct lp_build_emit_data * emit_data)
@@ -2132,6 +2232,12 @@ static const struct lp_build_tgsi_action txq_action = {
 	.intr_name = "llvm.SI.resinfo"
 };
 
+static const struct lp_build_tgsi_action new_tex_action = {
+	.fetch_args = tex_fetch_args,
+	.emit = build_new_tex_intrinsic,
+	.intr_name = "llvm.SI.gather4"
+};
+
 static void create_meta_data(struct si_shader_context *si_shader_ctx)
 {
 	struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
@@ -2596,6 +2702,7 @@ int si_pipe_shader_create(
 	bld_base->op_actions[TGSI_OPCODE_TXL2] = txl_action;
 	bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
+	bld_base->op_actions[TGSI_OPCODE_TG4] = new_tex_action;
 
 #if HAVE_LLVM >= 0x0304
 	bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
-- 
1.9.1



More information about the mesa-dev mailing list