Mesa (master): freedreno/a3xx: add support for vertexid and instanceid sysvals

Ilia Mirkin imirkin at kemper.freedesktop.org
Thu Feb 19 05:28:07 UTC 2015


Module: Mesa
Branch: master
Commit: f6b2e8af7425c67f8def9dfba92f6f0ad9585b40
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f6b2e8af7425c67f8def9dfba92f6f0ad9585b40

Author: Ilia Mirkin <imirkin at alum.mit.edu>
Date:   Wed Oct  1 23:13:22 2014 -0400

freedreno/a3xx: add support for vertexid and instanceid sysvals

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>

---

 docs/relnotes/10.6.0.html                        |    1 +
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c    |   46 ++++++++++---
 src/gallium/drivers/freedreno/freedreno_screen.c |    8 ++-
 src/gallium/drivers/freedreno/ir3/ir3_compiler.c |   80 +++++++++++++++++++++-
 src/gallium/drivers/freedreno/ir3/ir3_shader.h   |    1 +
 5 files changed, 120 insertions(+), 16 deletions(-)

diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html
index d201a65..bedbd4b 100644
--- a/docs/relnotes/10.6.0.html
+++ b/docs/relnotes/10.6.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
 
 <ul>
 <li>GL_AMD_pinned_memory on r600, radeonsi</li>
+<li>GL_ARB_draw_instanced on freedreno</li>
 <li>GL_ARB_pipeline_statistics_query on i965, nvc0, r600, radeonsi, softpipe</li>
 </ul>
 
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 1c17e2d..ad5fcb3 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -351,21 +351,31 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
 void
 fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 {
-	uint32_t i, j, last = 0;
+	int32_t i, j, last = -1;
 	uint32_t total_in = 0;
 	const struct fd_vertex_state *vtx = emit->vtx;
 	struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
-	unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count);
+	unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+
+	for (i = 0; i < vp->inputs_count; i++) {
+		uint8_t semantic = sem2name(vp->inputs[i].semantic);
+		if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
+			vertex_regid = vp->inputs[i].regid;
+		else if (semantic == TGSI_SEMANTIC_INSTANCEID)
+			instance_regid = vp->inputs[i].regid;
+		else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
+			last = i;
+	}
 
 	/* hw doesn't like to be configured for zero vbo's, it seems: */
-	if (vtx->vtx->num_elements == 0)
+	if (vtx->vtx->num_elements == 0 &&
+		vertex_regid == regid(63, 0) &&
+		instance_regid == regid(63, 0))
 		return;
 
-	for (i = 0; i < n; i++)
-		if (vp->inputs[i].compmask)
-			last = i;
-
 	for (i = 0, j = 0; i <= last; i++) {
+		uint8_t semantic = sem2name(vp->inputs[i].semantic);
+		assert(semantic == 0);
 		if (vp->inputs[i].compmask) {
 			struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
 			const struct pipe_vertex_buffer *vb =
@@ -373,7 +383,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 			struct fd_resource *rsc = fd_resource(vb->buffer);
 			enum pipe_format pfmt = elem->src_format;
 			enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
-			bool switchnext = (i != last);
+			bool switchnext = (i != last) ||
+				vertex_regid != regid(63, 0) ||
+				instance_regid != regid(63, 0);
 			bool isint = util_format_is_pure_integer(pfmt);
 			uint32_t fs = util_format_get_blocksize(pfmt);
 
@@ -409,8 +421,8 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 			A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
 			A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
 	OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
-			A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
-			A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
+			A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+			A3XX_VFD_CONTROL_1_REGID4INST(instance_regid));
 }
 
 void
@@ -580,6 +592,20 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		}
 	}
 
+	/* emit driver params every time */
+	if (emit->info && emit->prog == &ctx->prog) {
+		uint32_t vertex_params[4] = {
+			emit->info->indexed ? emit->info->index_bias : emit->info->start,
+			0,
+			0,
+			0
+		};
+		if (vp->constlen > vp->first_driver_param) {
+			fd3_emit_constant(ring, SB_VERT_SHADER, vp->first_driver_param * 4,
+							  0, 4, vertex_params, NULL);
+		}
+	}
+
 	if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
 		struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
 		uint32_t i;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 1ce96d3..7952c04 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -160,7 +160,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_SHADOW_MAP:
 	case PIPE_CAP_BLEND_EQUATION_SEPARATE:
 	case PIPE_CAP_TEXTURE_SWIZZLE:
-	case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
 	case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
 	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
@@ -172,6 +171,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
 	case PIPE_CAP_USER_CONSTANT_BUFFERS:
 	case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+	case PIPE_CAP_VERTEXID_NOBASE:
 		return 1;
 
 	case PIPE_CAP_SHADER_STENCIL_EXPORT:
@@ -186,7 +186,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
 	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
 	case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-	case PIPE_CAP_TGSI_INSTANCEID:
+	case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
 	case PIPE_CAP_START_INSTANCE:
 	case PIPE_CAP_COMPUTE:
 		return 0;
@@ -195,6 +195,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_PRIMITIVE_RESTART:
 		return is_a3xx(screen) || is_a4xx(screen);
 
+	case PIPE_CAP_TGSI_INSTANCEID:
+		return is_a3xx(screen) && glsl130;
+
 	case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
 		return 256;
 
@@ -228,7 +231,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 	case PIPE_CAP_CLIP_HALFZ:
-	case PIPE_CAP_VERTEXID_NOBASE:
 	case PIPE_CAP_POLYGON_OFFSET_CLAMP:
 	case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
 		return 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 8c88bf7..3ee9642 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -83,6 +83,9 @@ struct ir3_compile_context {
 	 */
 	struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4];
 
+	/* For vertex shaders, keep track of the system values sources */
+	struct ir3_instruction *vertex_id, *basevertex, *instance_id;
+
 	struct tgsi_parse_context parser;
 	unsigned type;
 
@@ -105,6 +108,9 @@ struct ir3_compile_context {
 	unsigned num_internal_temps;
 	struct tgsi_src_register internal_temps[8];
 
+	/* for looking up which system value is which */
+	unsigned sysval_semantics[8];
+
 	/* idx/slot for last compiler generated immediate */
 	unsigned immediate_idx;
 
@@ -222,6 +228,8 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 	ctx->atomic = false;
 	ctx->frag_pos = NULL;
 	ctx->frag_face = NULL;
+	ctx->vertex_id = NULL;
+	ctx->instance_id = NULL;
 	ctx->tmp_src = NULL;
 	ctx->using_tmp_dst = false;
 
@@ -239,7 +247,7 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 	 * the assembler what the max addr reg value can be:
 	 */
 	if (info->indirect_files & FM(CONSTANT))
-		so->constlen = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1);
+		so->constlen = ctx->info.file_max[TGSI_FILE_CONSTANT] + 1;
 
 	i = 0;
 	i += setup_arrays(ctx, TGSI_FILE_INPUT, i);
@@ -248,7 +256,12 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 	/* any others? we don't track arrays for const..*/
 
 	/* Immediates go after constants: */
-	so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
+	if (so->type == SHADER_VERTEX) {
+		so->first_driver_param = info->file_max[TGSI_FILE_CONSTANT] + 1;
+		so->first_immediate = so->first_driver_param + 1;
+	} else {
+		so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
+	}
 	ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
 
 	ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
@@ -355,7 +368,7 @@ push_block(struct ir3_compile_context *ctx)
 	ntmp += 8 * 4;
 
 	nout = SCALAR_REGS(OUTPUT);
-	nin  = SCALAR_REGS(INPUT);
+	nin  = SCALAR_REGS(INPUT) + SCALAR_REGS(SYSTEM_VALUE);
 
 	/* for outermost block, 'inputs' are the actual shader INPUT
 	 * register file.  Reads from INPUT registers always go back to
@@ -555,6 +568,19 @@ ssa_instr(struct ir3_compile_context *ctx, unsigned file, unsigned n)
 			block->temporaries[n] = instr;
 		}
 		break;
+	case TGSI_FILE_SYSTEM_VALUE:
+		switch (ctx->sysval_semantics[n >> 2]) {
+		case TGSI_SEMANTIC_VERTEXID_NOBASE:
+			instr = ctx->vertex_id;
+			break;
+		case TGSI_SEMANTIC_BASEVERTEX:
+			instr = ctx->basevertex;
+			break;
+		case TGSI_SEMANTIC_INSTANCEID:
+			instr = ctx->instance_id;
+			break;
+		}
+		break;
 	}
 
 	return instr;
@@ -735,6 +761,7 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
 		 */
 	case TGSI_FILE_INPUT:
 	case TGSI_FILE_TEMPORARY:
+	case TGSI_FILE_SYSTEM_VALUE:
 		/* uses SSA */
 		break;
 	default:
@@ -2935,6 +2962,51 @@ decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
 }
 
 static void
+decl_sv(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+	struct ir3_shader_variant *so = ctx->so;
+	unsigned r = regid(so->inputs_count, 0);
+	unsigned n = so->inputs_count++;
+
+	DBG("decl sv -> r%d", n);
+
+	compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
+	compile_assert(ctx, decl->Range.First < ARRAY_SIZE(ctx->sysval_semantics));
+
+	ctx->sysval_semantics[decl->Range.First] = decl->Semantic.Name;
+	so->inputs[n].semantic = decl_semantic(&decl->Semantic);
+	so->inputs[n].compmask = 1;
+	so->inputs[n].regid = r;
+	so->inputs[n].inloc = ctx->next_inloc;
+	so->inputs[n].interpolate = false;
+
+	struct ir3_instruction *instr = NULL;
+
+	switch (decl->Semantic.Name) {
+	case TGSI_SEMANTIC_VERTEXID_NOBASE:
+		ctx->vertex_id = instr = create_input(ctx->block, NULL, r);
+		break;
+	case TGSI_SEMANTIC_BASEVERTEX:
+		ctx->basevertex = instr = instr_create(ctx, 1, 0);
+		instr->cat1.src_type = get_stype(ctx);
+		instr->cat1.dst_type = get_stype(ctx);
+		ir3_reg_create(instr, 0, 0);
+		ir3_reg_create(instr, regid(so->first_driver_param, 0), IR3_REG_CONST);
+		break;
+	case TGSI_SEMANTIC_INSTANCEID:
+		ctx->instance_id = instr = create_input(ctx->block, NULL, r);
+		break;
+	default:
+		compile_error(ctx, "Unknown semantic: %s\n",
+					  tgsi_semantic_names[decl->Semantic.Name]);
+	}
+
+	ctx->block->inputs[r] = instr;
+	ctx->next_inloc++;
+	so->total_in++;
+}
+
+static void
 decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
 {
 	struct ir3_shader_variant *so = ctx->so;
@@ -3099,6 +3171,8 @@ compile_instructions(struct ir3_compile_context *ctx)
 				decl_out(ctx, decl);
 			} else if (file == TGSI_FILE_INPUT) {
 				decl_in(ctx, decl);
+			} else if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
+				decl_sv(ctx, decl);
 			}
 
 			if ((file != TGSI_FILE_CONSTANT) && decl->Declaration.Array) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 5207185..e5d57af 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -182,6 +182,7 @@ struct ir3_shader_variant {
 	 * (not regid, because TGSI thinks in terms of vec4 registers,
 	 * not scalar registers)
 	 */
+	unsigned first_driver_param;
 	unsigned first_immediate;
 	unsigned immediates_count;
 	struct {




More information about the mesa-commit mailing list