[Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support

Dave Airlie airlied at gmail.com
Sun Dec 16 02:33:23 PST 2012


From: Dave Airlie <airlied at redhat.com>

This adds TBO support to r600g, and with GLSL 1.40 enabled,
we now get 3.1 core profiles advertised for r600g.

This code is evergreen only so far, but I don't think there is
much to make it work on r600/700/cayman other than testing.

a) buffer txq is broken like cube map txq, this sucks, fix it the
exact same way.

b) buffer fetches are done with a vertex clause,

c) vertex swizzling offsets are different than texture swizzles,
but we still need to use the combiner, so make it configurable.

d) add implementation of UCMP.

TODO: r600/700/cayman testin
Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/drivers/r600/evergreen_state.c   | 55 ++++++++++++++++++++
 src/gallium/drivers/r600/r600_asm.c          |  2 +-
 src/gallium/drivers/r600/r600_asm.h          |  2 +
 src/gallium/drivers/r600/r600_pipe.c         |  4 +-
 src/gallium/drivers/r600/r600_pipe.h         | 10 +++-
 src/gallium/drivers/r600/r600_shader.c       | 75 ++++++++++++++++++++++++++++
 src/gallium/drivers/r600/r600_shader.h       |  1 +
 src/gallium/drivers/r600/r600_state_common.c | 58 +++++++++++++++++----
 src/gallium/drivers/r600/r600_texture.c      | 16 ++++--
 9 files changed, 204 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 996c1b4..49564e7 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -969,6 +969,58 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
 	return ss;
 }
 
+static struct pipe_sampler_view *
+texture_buffer_sampler_view(struct r600_pipe_sampler_view *view,
+			    unsigned width0, unsigned height0)
+			    
+{
+	struct pipe_context *ctx = view->base.context;
+	struct r600_texture *tmp = (struct r600_texture*)view->base.texture;
+	uint64_t va;
+	int stride = util_format_get_blocksize(view->base.format);
+	unsigned format, num_format, format_comp, endian;
+	unsigned swizzle_res;
+	unsigned char swizzle[4];
+	const struct util_format_description *desc;
+
+	swizzle[0] = view->base.swizzle_r;
+	swizzle[1] = view->base.swizzle_g;
+	swizzle[2] = view->base.swizzle_b;
+	swizzle[3] = view->base.swizzle_a;
+
+	r600_vertex_data_type(view->base.format,
+			      &format, &num_format, &format_comp,
+			      &endian);
+
+	desc = util_format_description(view->base.format);
+
+	swizzle_res = r600_get_swizzle_combined(desc->swizzle, swizzle, TRUE);
+
+	va = r600_resource_va(ctx->screen, view->base.texture);
+	view->tex_resource = &tmp->resource;
+
+	view->skip_mip_address_reloc = true;
+	view->tex_resource_words[0] = va;
+	view->tex_resource_words[1] = width0 - 1;
+	view->tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) |
+		S_030008_STRIDE(stride) |
+		S_030008_DATA_FORMAT(format) |
+		S_030008_NUM_FORMAT_ALL(num_format) |
+		S_030008_FORMAT_COMP_ALL(format_comp) |
+		S_030008_SRF_MODE_ALL(1) |
+		S_030008_ENDIAN_SWAP(endian);
+	view->tex_resource_words[3] = swizzle_res;
+	/*
+	 * in theory dword 4 is for number of elements, for use with resinfo,
+	 * but it seems to utterly fail to work, the amd gpu shader analyser
+	 * uses a const buffer to store the element sizes for buffer txq
+	 */
+	view->tex_resource_words[4] = 0;
+	view->tex_resource_words[5] = view->tex_resource_words[6] = 0;
+	view->tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER);
+	return &view->base;
+}
+
 struct pipe_sampler_view *
 evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 				     struct pipe_resource *texture,
@@ -997,6 +1049,9 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 	view->base.reference.count = 1;
 	view->base.context = ctx;
 
+	if (texture->target == PIPE_BUFFER)
+		return texture_buffer_sampler_view(view, width0, height0);
+
 	swizzle[0] = state->swizzle_r;
 	swizzle[1] = state->swizzle_g;
 	swizzle[2] = state->swizzle_b;
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 268137f..4bb22bd 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2607,7 +2607,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
 	fprintf(stderr, "--------------------------------------\n");
 }
 
-static void r600_vertex_data_type(enum pipe_format pformat,
+void r600_vertex_data_type(enum pipe_format pformat,
 				  unsigned *format,
 				  unsigned *num_format, unsigned *format_comp, unsigned *endian)
 {
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 5727a7c..182f403 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -250,4 +250,6 @@ void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint3
 void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
 void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
 
+void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
+			   unsigned *num_format, unsigned *format_comp, unsigned *endian);
 #endif
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 19147d9..0b94bd3 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -424,7 +424,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 		return 256;
 
 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
-		return 130;
+		return family >= CHIP_CEDAR ? 140 : 130;
 
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 		return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
@@ -438,6 +438,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	/* Supported on Evergreen. */
 	case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
 	case PIPE_CAP_CUBE_MAP_ARRAY:
+	case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
 		return family >= CHIP_CEDAR ? 1 : 0;
 
 	/* Unsupported features. */
@@ -449,7 +450,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
 	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
-	case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
 		return 0;
 
 	/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 50f181d..8b25277 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -38,12 +38,13 @@
 #define R600_NUM_ATOMS 36
 
 #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 2
+#define R600_MAX_DRIVER_CONST_BUFFERS 3
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 
 /* start driver buffers after user buffers */
 #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_BUFFER_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
 
 #define R600_MAX_CONST_BUFFER_SIZE 4096
 
@@ -316,6 +317,7 @@ struct r600_samplerview_state {
 	uint32_t			compressed_depthtex_mask; /* which textures are depth */
 	uint32_t			compressed_colortex_mask;
 	boolean                         dirty_txq_constants;
+	boolean				dirty_buffer_txq_constants;
 };
 
 struct r600_sampler_states {
@@ -333,6 +335,8 @@ struct r600_textures_info {
 
 	/* cube array txq workaround */
 	uint32_t			*txq_constants;
+	/* buffer txq workaround */
+	uint32_t			*buffer_txq_constants;
 };
 
 struct r600_fence {
@@ -663,6 +667,10 @@ struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
 						const struct pipe_surface *templ,
 						unsigned width, unsigned height);
 
+unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
+				   const unsigned char *swizzle_view,
+				   boolean vtx);
+
 /* r600_state_common.c */
 void r600_init_common_state_functions(struct r600_context *rctx);
 void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index feb7001..60667e7 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3819,6 +3819,71 @@ static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
 	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
 }
 
+static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading)
+{
+	struct r600_bytecode_vtx vtx;
+	struct r600_bytecode_alu alu;
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	int src_gpr, r, i;
+
+	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
+	if (src_requires_loading) {
+		for (i = 0; i < 4; i++) {
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+			alu.dst.sel = ctx->temp_reg;
+			alu.dst.chan = i;
+			if (i == 3)
+				alu.last = 1;
+			alu.dst.write = 1;
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
+			if (r)
+				return r;
+		}
+		src_gpr = ctx->temp_reg;
+	}
+
+	memset(&vtx, 0, sizeof(vtx));
+	vtx.inst = 0;
+	vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + R600_MAX_CONST_BUFFERS;;
+	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
+	vtx.src_gpr = src_gpr;
+	vtx.mega_fetch_count = 16;
+	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+	vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;		/* SEL_X */
+	vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;		/* SEL_Y */
+	vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;		/* SEL_Z */
+	vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;		/* SEL_W */
+	vtx.use_const_fields = 1;
+	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
+
+	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
+		return r;
+	return 0;
+}
+
+static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	struct r600_bytecode_alu alu;
+	int r;
+	int id = tgsi_tex_get_src_gpr(ctx, 1);
+
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+
+	alu.src[0].sel = 512 + (id / 4);
+	alu.src[0].kc_bank = R600_BUFFER_TXQ_CONST_BUFFER;
+	alu.src[0].chan = id % 4;
+	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+	alu.last = 1;
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
+	if (r)
+		return r;
+	return 0;
+}
+
 static int tgsi_tex(struct r600_shader_ctx *ctx)
 {
 	static float one_point_five = 1.5f;
@@ -3857,6 +3922,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
 	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
 
+	if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
+		if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
+			ctx->shader->has_txq_buffer = true;
+			return r600_do_buffer_txq(ctx);
+		}
+		else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF)
+			return do_vtx_fetch_inst(ctx, src_requires_loading);
+		return -1; /* can only TXF/TXQ from buffers */
+	}
+
 	if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
 		/* get offset values */
 		if (inst->Texture.NumOffsets) {
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index b58a58a..88f71ad 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -61,6 +61,7 @@ struct r600_shader {
 	boolean			vs_out_misc_write;
 	boolean			vs_out_point_size;
 	boolean			has_txq_cube_array_z_comp;
+	boolean			has_txq_buffer;
 };
 
 struct r600_shader_key {
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index b20f655..9e51f7d 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -591,19 +591,20 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
 			struct r600_texture *rtex =
 				(struct r600_texture*)rviews[i]->base.texture;
 
-			if (rtex->is_depth && !rtex->is_flushing_texture) {
-				dst->views.compressed_depthtex_mask |= 1 << i;
-			} else {
-				dst->views.compressed_depthtex_mask &= ~(1 << i);
-			}
+			if (rviews[i]->base.texture->target != PIPE_BUFFER) {
+				if (rtex->is_depth && !rtex->is_flushing_texture) {
+					dst->views.compressed_depthtex_mask |= 1 << i;
+				} else {
+					dst->views.compressed_depthtex_mask &= ~(1 << i);
+				}
 
-			/* Track compressed colorbuffers. */
-			if (rtex->cmask_size && rtex->fmask_size) {
-				dst->views.compressed_colortex_mask |= 1 << i;
-			} else {
-				dst->views.compressed_colortex_mask &= ~(1 << i);
+				/* Track compressed colorbuffers. */
+				if (rtex->cmask_size && rtex->fmask_size) {
+					dst->views.compressed_colortex_mask |= 1 << i;
+				} else {
+					dst->views.compressed_colortex_mask &= ~(1 << i);
+				}
 			}
-
 			/* Changing from array to non-arrays textures and vice versa requires
 			 * updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */
 			if (rctx->chip_class <= R700 &&
@@ -628,6 +629,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
 	dst->views.compressed_depthtex_mask &= dst->views.enabled_mask;
 	dst->views.compressed_colortex_mask &= dst->views.enabled_mask;
 	dst->views.dirty_txq_constants = TRUE;
+	dst->views.dirty_buffer_txq_constants = TRUE;
 	r600_sampler_views_dirty(rctx, &dst->views);
 
 	if (dirty_sampler_states_mask) {
@@ -1026,6 +1028,35 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
 	rctx->sample_mask.atom.dirty = true;
 }
 
+static void r600_setup_txq_buffer_constants(struct r600_context *rctx, int shader_type)
+{
+	struct r600_textures_info *samplers = &rctx->samplers[shader_type];
+	int bits;
+	uint32_t array_size;
+	struct pipe_constant_buffer cb;
+	int i;
+
+	if (!samplers->views.dirty_buffer_txq_constants)
+		return;
+
+	samplers->views.dirty_buffer_txq_constants = FALSE;
+
+	bits = util_last_bit(samplers->views.enabled_mask);
+	array_size = bits * sizeof(uint32_t) * 4;
+	samplers->buffer_txq_constants = realloc(samplers->buffer_txq_constants, array_size);
+	memset(samplers->buffer_txq_constants, 0, array_size);
+	for (i = 0; i < bits; i++)
+		if (samplers->views.enabled_mask & (1 << i))
+		   samplers->buffer_txq_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+
+	cb.buffer = NULL;
+	cb.user_buffer = samplers->buffer_txq_constants;
+	cb.buffer_offset = 0;
+	cb.buffer_size = array_size;
+	rctx->context.set_constant_buffer(&rctx->context, shader_type, R600_BUFFER_TXQ_CONST_BUFFER, &cb);
+	pipe_resource_reference(&cb.buffer, NULL);
+}
+
 static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int shader_type)
 {
 	struct r600_textures_info *samplers = &rctx->samplers[shader_type];
@@ -1098,6 +1129,11 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 	if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_cube_array_z_comp)
 		r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_VERTEX);
 
+	if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_buffer)
+		r600_setup_txq_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
+	if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_buffer)
+		r600_setup_txq_buffer_constants(rctx, PIPE_SHADER_VERTEX);
+
 	if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
 		if (!r600_adjust_gprs(rctx)) {
 			/* discard rendering */
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 56e9b64..5736190 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -862,18 +862,26 @@ void r600_init_surface_functions(struct r600_context *r600)
 	r600->context.surface_destroy = r600_surface_destroy;
 }
 
-static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
-		const unsigned char *swizzle_view)
+unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
+				   const unsigned char *swizzle_view,
+				   boolean vtx)
 {
 	unsigned i;
 	unsigned char swizzle[4];
 	unsigned result = 0;
-	const uint32_t swizzle_shift[4] = {
+	const uint32_t tex_swizzle_shift[4] = {
 		16, 19, 22, 25,
 	};
+	const uint32_t vtx_swizzle_shift[4] = {
+		3, 6, 9, 12,
+	};
 	const uint32_t swizzle_bit[4] = {
 		0, 1, 2, 3,
 	};
+	const uint32_t *swizzle_shift = tex_swizzle_shift;
+
+	if (vtx)
+		swizzle_shift = vtx_swizzle_shift;
 
 	if (swizzle_view) {
 		util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle);
@@ -927,7 +935,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
 	};
 	desc = util_format_description(format);
 
-	word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view);
+	word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE);
 
 	/* Colorspace (return non-RGB formats directly). */
 	switch (desc->colorspace) {
-- 
1.8.0



More information about the mesa-dev mailing list