[Mesa-dev] [PATCH 10/11] freedreno/ir3: add support for ucp

Rob Clark robdclark at gmail.com
Sun Sep 13 08:51:57 PDT 2015


From: Rob Clark <robclark at freedesktop.org>

Use nir_lower_clip pass for adding the VS/FS instructions to handle
user-clip-planes and CLIPDIST.  Wire up support for load_user_clip_plane
intrinsic to fetch ucp[plane] values as driver-params (passed as const's
to the shader).

Signed-off-by: Rob Clark <robclark at freedesktop.org>
---
 src/gallium/drivers/freedreno/ir3/ir3_cmdline.c    |  8 ++++++
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       | 32 ++++++++++++++++------
 src/gallium/drivers/freedreno/ir3/ir3_shader.c     | 27 ++++++++++++++----
 src/gallium/drivers/freedreno/ir3/ir3_shader.h     | 26 ++++++++++++++++++
 4 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index ede29f4..cbf748a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -94,6 +94,7 @@ static void print_usage(void)
 	printf("    --saturate-t MASK - bitmask of samplers to saturate T coord\n");
 	printf("    --saturate-r MASK - bitmask of samplers to saturate R coord\n");
 	printf("    --stream-out      - enable stream-out (aka transform feedback)\n");
+	printf("    --ucp MASK        - bitmask of enabled user-clip-planes\n");
 	printf("    --help            - show this message\n");
 }
 
@@ -190,6 +191,13 @@ int main(int argc, char **argv)
 			continue;
 		}
 
+		if (!strcmp(argv[n], "--ucp")) {
+			debug_printf(" %s %s", argv[n], argv[n+1]);
+			key.ucp_enables = strtol(argv[n+1], NULL, 0);
+			n += 2;
+			continue;
+		}
+
 		if (!strcmp(argv[n], "--help")) {
 			print_usage();
 			return 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 29846c4..aae32a7 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -127,7 +127,8 @@ struct ir3_compile {
 static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
 static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
 
-static struct nir_shader *to_nir(const struct tgsi_token *tokens)
+static struct nir_shader *to_nir(const struct tgsi_token *tokens,
+		struct ir3_shader_variant *so)
 {
 	struct nir_shader_compiler_options options = {
 			.lower_fpow = true,
@@ -148,6 +149,11 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
 
 	nir_opt_global_to_local(s);
 	nir_convert_to_ssa(s);
+	if (s->stage == MESA_SHADER_VERTEX) {
+		nir_lower_clip_vs(s, so->key.ucp_enables);
+	} else if (s->stage == MESA_SHADER_FRAGMENT) {
+		nir_lower_clip_fs(s, so->key.ucp_enables);
+	}
 	nir_lower_idiv(s);
 	nir_lower_load_const_to_scalar(s);
 
@@ -251,7 +257,7 @@ compile_init(struct ir3_compiler *compiler,
 	lowered_tokens = lower_tgsi(ctx, tokens, so);
 	if (!lowered_tokens)
 		lowered_tokens = tokens;
-	ctx->s = to_nir(lowered_tokens);
+	ctx->s = to_nir(lowered_tokens, so);
 
 	if (lowered_tokens != tokens)
 		free((void *)lowered_tokens);
@@ -263,7 +269,7 @@ compile_init(struct ir3_compiler *compiler,
 	 *    num_uniform * vec4  -  user consts
 	 *    4 * vec4            -  UBO addresses
 	 *    if (vertex shader) {
-	 *        1 * vec4        -  driver params (IR3_DP_*)
+	 *        N * vec4        -  driver params (IR3_DP_*)
 	 *        1 * vec4        -  stream-out addresses
 	 *    }
 	 *
@@ -275,8 +281,8 @@ compile_init(struct ir3_compiler *compiler,
 	so->first_immediate += 4;
 
 	if (so->type == SHADER_VERTEX) {
-		/* one (vec4) slot for driver params (see ir3_driver_param): */
-		so->first_immediate++;
+		/* driver params (see ir3_driver_param): */
+		so->first_immediate += IR3_DP_COUNT/4;  /* convert to vec4 */
 		/* one (vec4) slot for stream-output base addresses: */
 		so->first_immediate++;
 	}
@@ -828,7 +834,9 @@ static struct ir3_instruction *
 create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
 {
 	/* first four vec4 sysval's reserved for UBOs: */
-	unsigned r = regid(ctx->so->first_driver_param + 4, dp);
+	/* NOTE: dp is in scalar, but there can be >4 dp components: */
+	unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF;
+	unsigned r = regid(n + dp / 4, dp % 4);
 	return create_uniform(ctx, r);
 }
 
@@ -1199,7 +1207,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
 	struct ir3_block *b = ctx->block;
 	struct ir3_instruction *addr, *src0, *src1;
 	/* UBO addresses are the first driver params: */
-	unsigned ubo = regid(ctx->so->first_driver_param, 0);
+	unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
 	unsigned off = intr->const_index[0];
 
 	/* First src is ubo index, which could either be an immed or not: */
@@ -1459,6 +1467,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 		}
 		dst[0] = ctx->instance_id;
 		break;
+	case nir_intrinsic_load_user_clip_plane:
+		for (int i = 0; i < intr->num_components; i++) {
+			unsigned n = idx * 4 + i;
+			dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
+		}
+		break;
 	case nir_intrinsic_discard_if:
 	case nir_intrinsic_discard: {
 		struct ir3_instruction *cond, *kill;
@@ -2066,7 +2080,7 @@ emit_stream_out(struct ir3_compile *ctx)
 		unsigned stride = strmout->stride[i];
 		struct ir3_instruction *base, *off;
 
-		base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
+		base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i));
 
 		/* 24-bit should be enough: */
 		off = ir3_MUL_U(ctx->block, vtxcnt, 0,
@@ -2250,6 +2264,8 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
 		case VARYING_SLOT_BFC0:
 		case VARYING_SLOT_BFC1:
 		case VARYING_SLOT_FOGC:
+		case VARYING_SLOT_CLIP_DIST0:
+		case VARYING_SLOT_CLIP_DIST1:
 			break;
 		default:
 			if (slot >= VARYING_SLOT_VAR0)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 7b25050..7b56533 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -501,7 +501,7 @@ static void
 emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
 		struct fd_constbuf_stateobj *constbuf)
 {
-	uint32_t offset = v->first_driver_param;  /* UBOs after user consts */
+	uint32_t offset = v->first_driver_param + IR3_UBOS_OFF;
 	if (v->constlen > offset) {
 		struct fd_context *ctx = fd_context(v->shader->pctx);
 		uint32_t params = MIN2(4, v->constlen - offset) * 4;
@@ -554,7 +554,8 @@ emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
 static void
 emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
 {
-	uint32_t offset = v->first_driver_param + 5;  /* streamout addresses after driver-params*/
+	/* streamout addresses after driver-params: */
+	uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF;
 	if (v->constlen > offset) {
 		struct fd_context *ctx = fd_context(v->shader->pctx);
 		struct fd_streamout_stateobj *so = &ctx->streamout;
@@ -657,17 +658,33 @@ ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
 	/* emit driver params every time: */
 	/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
 	if (info && (v->type == SHADER_VERTEX)) {
-		uint32_t offset = v->first_driver_param + 4;  /* driver params after UBOs */
+		uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF;
 		if (v->constlen >= offset) {
-			uint32_t vertex_params[4] = {
+			uint32_t vertex_params[IR3_DP_COUNT] = {
 				[IR3_DP_VTXID_BASE] = info->indexed ?
 						info->index_bias : info->start,
 				[IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
 			};
+			/* if no user-clip-planes, we don't need to emit the
+			 * entire thing:
+			 */
+			uint32_t vertex_params_size = 4;
+
+			if (v->key.ucp_enables) {
+				struct pipe_clip_state *ucp = &ctx->ucp;
+				unsigned pos = IR3_DP_UCP0_X;
+				for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
+					for (unsigned j = 0; j < 4; j++) {
+						vertex_params[pos] = fui(ucp->ucp[i][j]);
+						pos++;
+					}
+				}
+				vertex_params_size = ARRAY_SIZE(vertex_params);
+			}
 
 			fd_wfi(ctx, ring);
 			ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
-					ARRAY_SIZE(vertex_params), vertex_params, NULL);
+					vertex_params_size, vertex_params, NULL);
 
 			/* if needed, emit stream-out buffer addresses: */
 			if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 39b8864..6dc0ce1 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -39,8 +39,29 @@
 enum ir3_driver_param {
 	IR3_DP_VTXID_BASE = 0,
 	IR3_DP_VTXCNT_MAX = 1,
+	/* user-clip-plane components, up to 8x vec4's: */
+	IR3_DP_UCP0_X     = 4,
+	/* .... */
+	IR3_DP_UCP7_W     = 35,
+	IR3_DP_COUNT      = 36   /* must be aligned to vec4 */
 };
 
+/* Layout of constant registers:
+ *
+ *    num_uniform * vec4  -  user consts
+ *    4 * vec4            -  UBO addresses
+ *    if (vertex shader) {
+ *        N * vec4        -  driver params (IR3_DP_*)
+ *        1 * vec4        -  stream-out addresses
+ *    }
+ *
+ * TODO this could be made more dynamic, to at least skip sections
+ * that we don't need..
+ */
+#define IR3_UBOS_OFF         0  /* UBOs after user consts */
+#define IR3_DRIVER_PARAM_OFF 4  /* driver params after UBOs */
+#define IR3_TFBOS_OFF       (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
+
 /* Configuration key used to identify a shader variant.. different
  * shader variants can be used to implement features not supported
  * in hw (two sided color), binning-pass vertex shader, etc.
@@ -48,6 +69,11 @@ enum ir3_driver_param {
 struct ir3_shader_key {
 	union {
 		struct {
+			/*
+			 * Combined Vertex/Fragment shader parameters:
+			 */
+			unsigned ucp_enables : 8;
+
 			/* do we need to check {v,f}saturate_{s,t,r}? */
 			unsigned has_per_samp : 1;
 
-- 
2.4.3



More information about the mesa-dev mailing list