[Mesa-dev] [PATCH 2/6] r600g: rework state emission of vertex buffers

Marek Olšák maraeo at gmail.com
Mon Apr 2 08:10:54 PDT 2012


This reduces a little of CPU overhead.
The idea is to translate pipe vertex buffers directly into the CS
and not using any intermediate representations.

Framerate in Torcs:
  before: 32.2
  after:  34.6
---
 src/gallium/drivers/r600/evergreen_hw_context.c |    4 --
 src/gallium/drivers/r600/evergreen_state.c      |   43 +++++++++++++++++
 src/gallium/drivers/r600/r600.h                 |    1 -
 src/gallium/drivers/r600/r600_hw_context.c      |   14 +-----
 src/gallium/drivers/r600/r600_pipe.h            |    7 ++-
 src/gallium/drivers/r600/r600_state.c           |   35 ++++++++++++++
 src/gallium/drivers/r600/r600_state_common.c    |   56 ++++-------------------
 7 files changed, 93 insertions(+), 67 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 0d2228d..010b5f3 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -718,16 +718,12 @@ int evergreen_context_init(struct r600_context *ctx)
 
 	ctx->num_ps_resources = 176;
 	ctx->num_vs_resources = 160;
-	ctx->num_fs_resources = 16;
 	r = evergreen_resource_range_init(ctx, &ctx->ps_resources, 0, 176, 0x20);
 	if (r)
 		goto out_err;
 	r = evergreen_resource_range_init(ctx, &ctx->vs_resources, 0x1600, 160, 0x20);
 	if (r)
 		goto out_err;
-	r = evergreen_resource_range_init(ctx, &ctx->fs_resources, 0x7C00, 16, 0x20);
-	if (r)
-		goto out_err;
 
 	/* PS loop const */
 	evergreen_loop_const_init(ctx, 0);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 04844e8..00d2d0d 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1721,10 +1721,53 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
 	r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
 }
 
+static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = rctx->cs;
+	struct pipe_vertex_buffer *vb = rctx->vbuf_mgr->real_vertex_buffer;
+	unsigned count = rctx->vbuf_mgr->nr_real_vertex_buffers;
+	unsigned i;
+	uint64_t va;
+
+	for (i = 0; i < count; i++) {
+		struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
+
+		if (!rbuffer) {
+			continue;
+		}
+
+		va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b.b);
+		va += vb[i].buffer_offset;
+
+		/* fetch resources start at index 992 */
+		r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
+		r600_write_value(cs, (992 + i) * 8);
+		r600_write_value(cs, va); /* RESOURCEi_WORD0 */
+		r600_write_value(cs, rbuffer->buf->size - vb[i].buffer_offset - 1); /* RESOURCEi_WORD1 */
+		r600_write_value(cs, /* RESOURCEi_WORD2 */
+				 S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
+				 S_030008_STRIDE(vb[i].stride) |
+				 S_030008_BASE_ADDRESS_HI(va >> 32UL));
+		r600_write_value(cs, /* RESOURCEi_WORD3 */
+				 S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+				 S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+				 S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+				 S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W));
+		r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
+		r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
+		r600_write_value(cs, 0); /* RESOURCEi_WORD6 */
+		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
+
+		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+	}
+}
+
 void evergreen_init_state_functions(struct r600_context *rctx)
 {
 	r600_init_atom(&rctx->db_misc_state.atom, evergreen_emit_db_misc_state, 6, 0);
 	r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+	r600_init_atom(&rctx->vertex_buffer_state, evergreen_emit_vertex_buffers, 0, 0);
 
 	rctx->context.create_blend_state = evergreen_create_blend_state;
 	rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 60b5694..867d9d5 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -202,7 +202,6 @@ void r600_context_fini(struct r600_context *ctx);
 void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state);
 void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
 void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
-void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 void r600_context_flush(struct r600_context *ctx, unsigned flags);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 5489cce..29e4d23 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -660,7 +660,6 @@ void r600_context_fini(struct r600_context *ctx)
 	}
 	r600_free_resource_range(ctx, &ctx->ps_resources, ctx->num_ps_resources);
 	r600_free_resource_range(ctx, &ctx->vs_resources, ctx->num_vs_resources);
-	r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources);
 	free(ctx->blocks);
 }
 
@@ -707,7 +706,6 @@ int r600_setup_block_table(struct r600_context *ctx)
 
 	r600_add_resource_block(ctx, &ctx->ps_resources, ctx->num_ps_resources, &c);
 	r600_add_resource_block(ctx, &ctx->vs_resources, ctx->num_vs_resources, &c);
-	r600_add_resource_block(ctx, &ctx->fs_resources, ctx->num_fs_resources, &c);
 	return 0;
 }
 
@@ -757,16 +755,12 @@ int r600_context_init(struct r600_context *ctx)
 
 	ctx->num_ps_resources = 160;
 	ctx->num_vs_resources = 160;
-	ctx->num_fs_resources = 16;
 	r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 160, 0x1c);
 	if (r)
 		goto out_err;
 	r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1180, 160, 0x1c);
 	if (r)
 		goto out_err;
-	r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x2300, 16, 0x1c);
-	if (r)
-		goto out_err;
 
 	/* PS loop const */
 	r600_loop_const_init(ctx, 0);
@@ -977,13 +971,6 @@ void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r6
 	r600_context_pipe_state_set_resource(ctx, state, block);
 }
 
-void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid)
-{
-	struct r600_block *block = ctx->fs_resources.blocks[rid];
-
-	r600_context_pipe_state_set_resource(ctx, state, block);
-}
-
 void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
 {
 	struct r600_range *range;
@@ -1246,6 +1233,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 
 	r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
 	r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
+	r600_atom_dirty(ctx, &ctx->vertex_buffer_state);
 
 	if (streamout_suspended) {
 		ctx->streamout_start = TRUE;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 96df79b..6de33cb 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -237,7 +237,6 @@ struct r600_context {
 	struct radeon_winsys		*ws;
 	struct r600_pipe_state		*states[R600_PIPE_NSTATES];
 	struct r600_vertex_element	*vertex_elements;
-	struct r600_pipe_resource_state	fs_resource[PIPE_MAX_ATTRIBS];
 	struct pipe_framebuffer_state	framebuffer;
 	unsigned			cb_target_mask;
 	unsigned			cb_color_control;
@@ -282,6 +281,7 @@ struct r600_context {
 	struct r600_surface_sync_cmd	surface_sync_cmd;
 	struct r600_atom		r6xx_flush_and_inv_cmd;
 	struct r600_db_misc_state	db_misc_state;
+	struct r600_atom		vertex_buffer_state;
 
 	/* Below are variables from the old r600_context.
 	 */
@@ -318,8 +318,7 @@ struct r600_context {
 	boolean                 predicate_drawing;
 	struct r600_range	ps_resources;
 	struct r600_range	vs_resources;
-	struct r600_range	fs_resources;
-	int			num_ps_resources, num_vs_resources, num_fs_resources;
+	int			num_ps_resources, num_vs_resources;
 
 	unsigned		num_so_targets;
 	struct r600_so_target	*so_targets[PIPE_MAX_SO_BUFFERS];
@@ -334,6 +333,8 @@ struct r600_context {
 	/* With rasterizer discard, there doesn't have to be a pixel shader.
 	 * In that case, we bind this one: */
 	void			*dummy_pixel_shader;
+
+	bool			vertex_buffers_dirty;
 };
 
 static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 016434a..96df9cb 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1699,10 +1699,45 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 	r600_write_value(cs, db_render_override); /* R_028D10_DB_RENDER_OVERRIDE */
 }
 
+static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = rctx->cs;
+	struct pipe_vertex_buffer *vb = rctx->vbuf_mgr->real_vertex_buffer;
+	unsigned count = rctx->vbuf_mgr->nr_real_vertex_buffers;
+	unsigned i, offset;
+
+	for (i = 0; i < count; i++) {
+		struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
+
+		if (!rbuffer) {
+			continue;
+		}
+
+		offset = vb[i].buffer_offset;
+
+		/* fetch resources start at index 320 */
+		r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
+		r600_write_value(cs, (320 + i) * 7);
+		r600_write_value(cs, offset); /* RESOURCEi_WORD0 */
+		r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+		r600_write_value(cs, /* RESOURCEi_WORD2 */
+				 S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
+				 S_038008_STRIDE(vb[i].stride));
+		r600_write_value(cs, 0); /* RESOURCEi_WORD3 */
+		r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
+		r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
+		r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */
+
+		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+		r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+	}
+}
+
 void r600_init_state_functions(struct r600_context *rctx)
 {
 	r600_init_atom(&rctx->db_misc_state.atom, r600_emit_db_misc_state, 4, 0);
 	r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+	r600_init_atom(&rctx->vertex_buffer_state, r600_emit_vertex_buffers, 0, 0);
 
 	rctx->context.create_blend_state = r600_create_blend_state;
 	rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 42b185c..c906e9f 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -396,19 +396,9 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
 			     const struct pipe_vertex_buffer *buffers)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	int i;
-
-	/* Zero states. */
-	for (i = 0; i < count; i++) {
-		if (!buffers[i].buffer) {
-			r600_context_pipe_state_set_fs_resource(rctx, NULL, i);
-		}
-	}
-	for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) {
-		r600_context_pipe_state_set_fs_resource(rctx, NULL, i);
-	}
 
 	u_vbuf_set_vertex_buffers(rctx->vbuf_mgr, count, buffers);
+	rctx->vertex_buffers_dirty = true;
 }
 
 void *r600_create_vertex_elements(struct pipe_context *ctx,
@@ -680,39 +670,6 @@ void r600_set_so_targets(struct pipe_context *ctx,
 	rctx->streamout_append_bitmask = append_bitmask;
 }
 
-static void r600_vertex_buffer_update(struct r600_context *rctx)
-{
-	unsigned i, count;
-
-	r600_inval_vertex_cache(rctx);
-
-	count = rctx->vbuf_mgr->nr_real_vertex_buffers;
-
-	for (i = 0 ; i < count; i++) {
-		struct r600_pipe_resource_state *rstate = &rctx->fs_resource[i];
-		struct pipe_vertex_buffer *vb = &rctx->vbuf_mgr->real_vertex_buffer[i];
-
-		if (!vb->buffer) {
-			continue;
-		}
-
-		if (!rstate->id) {
-			if (rctx->chip_class >= EVERGREEN) {
-				evergreen_pipe_init_buffer_resource(rctx, rstate);
-			} else {
-				r600_pipe_init_buffer_resource(rctx, rstate);
-			}
-		}
-
-		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(&rctx->context, rstate, (struct r600_resource*)vb->buffer, vb->buffer_offset, vb->stride, RADEON_USAGE_READ);
-		} else {
-			r600_pipe_mod_buffer_resource(rstate, (struct r600_resource*)vb->buffer, vb->buffer_offset, vb->stride, RADEON_USAGE_READ);
-		}
-		r600_context_pipe_state_set_fs_resource(rctx, rstate, i);
-	}
-}
-
 static int r600_shader_rebuild(struct pipe_context * ctx, struct r600_pipe_shader * shader)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
@@ -819,8 +776,15 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
 
 	r600_update_derived_state(rctx);
 
-	u_vbuf_draw_begin(rctx->vbuf_mgr, &info);
-	r600_vertex_buffer_update(rctx);
+	/* Update vertex buffers. */
+	if ((u_vbuf_draw_begin(rctx->vbuf_mgr, &info) & U_VBUF_BUFFERS_UPDATED) ||
+	    rctx->vertex_buffers_dirty) {
+		r600_inval_vertex_cache(rctx);
+		rctx->vertex_buffer_state.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 10) *
+						   rctx->vbuf_mgr->nr_real_vertex_buffers;
+		r600_atom_dirty(rctx, &rctx->vertex_buffer_state);
+		rctx->vertex_buffers_dirty = FALSE;
+	}
 
 	if (info.indexed) {
 		/* Initialize the index buffer struct. */
-- 
1.7.5.4



More information about the mesa-dev mailing list