[Mesa-dev] [PATCH 1/2] r600g: Unify 3D and compute vertex buffer emission

Tom Stellard tstellar at gmail.com
Thu Jul 12 12:50:27 PDT 2012


---
 src/gallium/drivers/r600/evergreen_compute.c       |   47 +++++++++---
 .../drivers/r600/evergreen_compute_internal.c      |   81 --------------------
 .../drivers/r600/evergreen_compute_internal.h      |    1 -
 src/gallium/drivers/r600/evergreen_state.c         |   30 +++++--
 src/gallium/drivers/r600/r600_pipe.h               |    5 +
 5 files changed, 64 insertions(+), 100 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index a88cad1..b61ea8f 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -83,6 +83,22 @@ writable images will consume TEX slots, VTX slots too because of linear indexing
 
 */
 
+static void evergreen_cs_set_vertex_buffer(
+	struct r600_context * rctx,
+	unsigned vb_index,
+	unsigned offset,
+	struct pipe_resource * buffer)
+{
+	struct pipe_vertex_buffer *vb = &rctx->cs_vertex_buffer[vb_index];
+	vb->stride = 1;
+	vb->buffer_offset = offset;
+	vb->buffer = buffer;
+	vb->user_buffer = NULL;
+
+	r600_inval_vertex_cache(rctx);
+	r600_atom_dirty(rctx, &rctx->cs_vertex_buffer_state);
+}
+
 const struct u_resource_vtbl r600_global_buffer_vtbl =
 {
 	u_default_resource_get_handle, /* get_handle */
@@ -263,8 +279,8 @@ void evergreen_compute_upload_input(
 	ctx->ws->buffer_unmap(ctx->cs_shader->kernel_param->cs_buf);
 
 	///ID=0 is reserved for the parameters
-	evergreen_set_vtx_resource(ctx->cs_shader,
-		ctx->cs_shader->kernel_param, 0, 0, 0);
+	evergreen_cs_set_vertex_buffer(ctx, 0, 0,
+			(struct pipe_resource*)ctx->cs_shader->kernel_param);
 	///ID=0 is reserved for parameters
 	evergreen_set_const_cache(ctx->cs_shader, 0,
 		ctx->cs_shader->kernel_param, ctx->cs_shader->input_size, 0);
@@ -350,6 +366,10 @@ static void compute_emit_cs(struct r600_context *ctx)
         cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER];
 	r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE);
 
+	/* Emit vertex buffer state */
+	ctx->cs_vertex_buffer_state.num_dw = 12 * ctx->nr_cs_vertex_buffers;
+	r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state);
+
 	for (i = 0; i < get_compute_resource_num(); i++) {
 		if (ctx->cs_shader->resources[i].enabled) {
 			int j;
@@ -452,14 +472,15 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
 			start, count);
 
 	for (int i = 0; i < count; i++)	{
+		/* The First two vertex buffers are reserved for parameters and
+		 * global buffers. */
+		unsigned vtx_id = 2 + i;
 		if (resources[i]) {
 			struct r600_resource_global *buffer =
-				(struct r600_resource_global*)resources[i]->base.texture;
+				(struct r600_resource_global*)
+				resources[i]->base.texture;
 			if (resources[i]->base.writable) {
 				assert(i+1 < 12);
-				struct r600_resource_global *buffer =
-					(struct r600_resource_global*)
-					resources[i]->base.texture;
 
 				evergreen_set_rat(ctx->cs_shader, i+1,
 				(struct r600_resource *)resources[i]->base.texture,
@@ -467,9 +488,10 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
 				resources[i]->base.texture->width0);
 			}
 
-			evergreen_set_vtx_resource(ctx->cs_shader,
-				(struct r600_resource *)resources[i]->base.texture, i+2,
-				 buffer->chunk->start_in_dw*4, resources[i]->base.writable);
+			evergreen_cs_set_vertex_buffer(ctx, vtx_id,
+					buffer->chunk->start_in_dw * 4,
+					resources[i]->base.texture);
+			ctx->nr_cs_vertex_buffers = vtx_id + 1;
 		}
 	}
 
@@ -539,7 +561,8 @@ static void evergreen_set_global_binding(
 	}
 
 	evergreen_set_rat(ctx->cs_shader, 0, pool->bo, 0, pool->size_in_dw * 4);
-	evergreen_set_vtx_resource(ctx->cs_shader, pool->bo, 1, 0, 1);
+	evergreen_cs_set_vertex_buffer(ctx, 1, 0,
+				(struct pipe_resource*)pool->bo);
 }
 
 /**
@@ -712,6 +735,10 @@ void evergreen_init_compute_state_functions(struct r600_context *ctx)
 	ctx->context.bind_compute_sampler_states = evergreen_bind_compute_sampler_states;
 	ctx->context.set_global_binding = evergreen_set_global_binding;
 	ctx->context.launch_grid = evergreen_launch_grid;
+
+	/* We always use at least two vertex buffers for compute, one for
+         * parameters and one for global memory */
+	ctx->nr_cs_vertex_buffers = 2;
 }
 
 
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
index c5aad93..1d11bab 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -489,87 +489,6 @@ static unsigned r600_tex_dim(unsigned dim)
 	}
 }
 
-void evergreen_set_vtx_resource(
-	struct r600_pipe_compute *pipe,
-	struct r600_resource* bo,
-	int id, uint64_t offset, int writable)
-{
-	assert(id < 16);
-	uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
-	struct number_type_and_format fmt;
-	uint64_t va;
-
-	fmt.format = 0;
-
-	assert(bo->b.b.height0 <= 1);
-	assert(bo->b.b.depth0 <= 1);
-
-	int e = evergreen_compute_get_gpu_format(&fmt, bo);
-
-	assert(e && "unknown format");
-
-	struct evergreen_compute_resource* res =
-		get_empty_res(pipe, COMPUTE_RESOURCE_VERT, id);
-
-	unsigned size = bo->b.b.width0;
-	unsigned stride = 1;
-
-//	size = (size * util_format_get_blockwidth(bo->b.b.b.format) *
-//		util_format_get_blocksize(bo->b.b.b.format));
-
-	va = r600_resource_va(&pipe->ctx->screen->screen, &bo->b.b) + offset;
-
-	COMPUTE_DBG("id: %i vtx size: %i byte,	width0: %i elem\n",
-		id, size, bo->b.b.width0);
-
-	sq_vtx_constant_word2 =
-		S_030008_BASE_ADDRESS_HI(va >> 32) |
-		S_030008_STRIDE(stride) |
-		S_030008_DATA_FORMAT(fmt.format) |
-		S_030008_NUM_FORMAT_ALL(fmt.num_format_all) |
-		S_030008_ENDIAN_SWAP(0);
-
-	COMPUTE_DBG("%08X %i %i %i %i\n", sq_vtx_constant_word2, offset,
-			stride, fmt.format, fmt.num_format_all);
-
-	sq_vtx_constant_word3 =
-		S_03000C_DST_SEL_X(0) |
-		S_03000C_DST_SEL_Y(1) |
-		S_03000C_DST_SEL_Z(2) |
-		S_03000C_DST_SEL_W(3);
-
-	sq_vtx_constant_word4 = 0;
-
-	evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
-	evergreen_emit_raw_value(res, (id+816)*32 >> 2);
-	evergreen_emit_raw_value(res, (unsigned)((va) & 0xffffffff));
-	evergreen_emit_raw_value(res, size - 1);
-	evergreen_emit_raw_value(res, sq_vtx_constant_word2);
-	evergreen_emit_raw_value(res, sq_vtx_constant_word3);
-	evergreen_emit_raw_value(res, sq_vtx_constant_word4);
-	evergreen_emit_raw_value(res, 0);
-	evergreen_emit_raw_value(res, 0);
-	evergreen_emit_raw_value(res, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
-
-	res->bo = bo;
-
-	if (writable) {
-		res->usage = RADEON_USAGE_READWRITE;
-	}
-	else {
-		res->usage = RADEON_USAGE_READ;
-	}
-
-	res->coher_bo_size = size;
-
-	r600_inval_vertex_cache(pipe->ctx);
-	/* XXX: Do we really need to invalidate the texture cache here?
-	 * r600_inval_vertex_cache() will invalidate the texture cache
-	 * if the chip does not have a vertex cache.
-	 */
-	r600_inval_texture_cache(pipe->ctx);
-}
-
 void evergreen_set_tex_resource(
 	struct r600_pipe_compute *pipe,
 	struct r600_pipe_sampler_view* view,
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h
index 75ada51..5fa9c48 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.h
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.h
@@ -107,7 +107,6 @@ void evergreen_set_gds(struct r600_pipe_compute *pipe, uint32_t addr, uint32_t s
 void evergreen_set_export(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size);
 void evergreen_set_loop_const(struct r600_pipe_compute *pipe, int id, int count, int init, int inc);
 void evergreen_set_tmp_ring(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size, int se);
-void evergreen_set_vtx_resource(struct r600_pipe_compute *pipe, struct r600_resource* bo, int id, uint64_t offset, int writable);
 void evergreen_set_tex_resource(struct r600_pipe_compute *pipe, struct r600_pipe_sampler_view* view, int id);
 void evergreen_set_sampler_resource(struct r600_pipe_compute *pipe, struct compute_sampler_state *sampler, int id);
 void evergreen_set_const_cache(struct r600_pipe_compute *pipe, int cache_id, struct r600_resource* cbo, int size, int offset);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 404df02..ab78f58 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1765,15 +1765,15 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
 	r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
 }
 
-static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
+static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom,
+	struct pipe_vertex_buffer *vb, unsigned vb_count, unsigned resource_offset,
+	unsigned pkt_flags)
 {
 	struct radeon_winsys_cs *cs = rctx->cs;
-	struct pipe_vertex_buffer *vb = rctx->vertex_buffer;
-	unsigned count = rctx->nr_vertex_buffers;
 	unsigned i;
 	uint64_t va;
 
-	for (i = 0; i < count; i++) {
+	for (i = 0; i < vb_count; i++) {
 		struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
 
 		if (!rbuffer) {
@@ -1784,8 +1784,8 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600
 		va += vb[i].buffer_offset;
 
 		/* fetch resources start at index 992 */
-		r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
-		r600_write_value(cs, (992 + i) * 8);
+		r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
+		r600_write_value(cs, (resource_offset + i) * 8);
 		r600_write_value(cs, va); /* RESOURCEi_WORD0 */
 		r600_write_value(cs, rbuffer->buf->size - vb[i].buffer_offset - 1); /* RESOURCEi_WORD1 */
 		r600_write_value(cs, /* RESOURCEi_WORD2 */
@@ -1802,11 +1802,24 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600
 		r600_write_value(cs, 0); /* RESOURCEi_WORD6 */
 		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
 
-		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
 		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
 	}
 }
 
+static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
+{
+	evergreen_emit_vertex_buffers(rctx, atom, rctx->vertex_buffer,
+					rctx->nr_vertex_buffers, 992, 0);
+}
+
+static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
+{
+	evergreen_emit_vertex_buffers(rctx, atom, rctx->cs_vertex_buffer,
+					rctx->nr_cs_vertex_buffers, 816,
+					RADEON_CP_PACKET3_COMPUTE_MODE);
+}
+
 static void evergreen_emit_constant_buffer(struct r600_context *rctx,
 					   struct r600_constbuf_state *state,
 				           unsigned buffer_id_base,
@@ -1882,7 +1895,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	r600_atom_dirty(rctx, &rctx->cb_misc_state.atom);
 	r600_init_atom(&rctx->db_misc_state.atom, evergreen_emit_db_misc_state, 6, 0);
 	r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
-	r600_init_atom(&rctx->vertex_buffer_state, evergreen_emit_vertex_buffers, 0, 0);
+	r600_init_atom(&rctx->vertex_buffer_state, evergreen_fs_emit_vertex_buffers, 0, 0);
+	r600_init_atom(&rctx->cs_vertex_buffer_state, evergreen_cs_emit_vertex_buffers, 0, 0);
 	r600_init_atom(&rctx->vs_constbuf_state.atom, evergreen_emit_vs_constant_buffer, 0, 0);
 	r600_init_atom(&rctx->ps_constbuf_state.atom, evergreen_emit_ps_constant_buffer, 0, 0);
 
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 7843579..ba63dcc 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -336,7 +336,10 @@ struct r600_context {
 	struct r600_atom		r6xx_flush_and_inv_cmd;
 	struct r600_cb_misc_state	cb_misc_state;
 	struct r600_db_misc_state	db_misc_state;
+	/** Vertex buffers for fetch shaders */
 	struct r600_atom		vertex_buffer_state;
+	/** Vertex buffers for compute shaders */
+	struct r600_atom		cs_vertex_buffer_state;
 	struct r600_constbuf_state	vs_constbuf_state;
 	struct r600_constbuf_state	ps_constbuf_state;
 
@@ -396,6 +399,8 @@ struct r600_context {
 	struct pipe_index_buffer index_buffer;
 	struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
 	unsigned		nr_vertex_buffers;
+	struct pipe_vertex_buffer cs_vertex_buffer[PIPE_MAX_ATTRIBS];
+	unsigned		nr_cs_vertex_buffers;
 };
 
 static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
-- 
1.7.7.6



More information about the mesa-dev mailing list