Mesa (master): r600g: atomize pixel shader

Marek Olšák mareko at kemper.freedesktop.org
Mon Mar 11 12:51:46 UTC 2013


Module: Mesa
Branch: master
Commit: 65cbf895670d2afb44d320fcc9d607f3c6c582ef
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=65cbf895670d2afb44d320fcc9d607f3c6c582ef

Author: Marek Olšák <maraeo at gmail.com>
Date:   Sat Mar  2 17:14:51 2013 +0100

r600g: atomize pixel shader

Reviewed-by: Jerome Glisse <jglisse at redhat.com>

---

 src/gallium/drivers/r600/evergreen_hw_context.c |   96 -----------------------
 src/gallium/drivers/r600/evergreen_state.c      |   69 ++++++++--------
 src/gallium/drivers/r600/evergreend.h           |    1 +
 src/gallium/drivers/r600/r600_hw_context.c      |   50 +------------
 src/gallium/drivers/r600/r600_pipe.h            |    9 ++-
 src/gallium/drivers/r600/r600_state.c           |   45 ++++++-----
 src/gallium/drivers/r600/r600_state_common.c    |   20 +++--
 7 files changed, 83 insertions(+), 207 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 730e51f..a3528fc 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -28,107 +28,11 @@
 #include "util/u_memory.h"
 #include "util/u_math.h"
 
-static const struct r600_reg evergreen_context_reg_list[] = {
-	{R_028644_SPI_PS_INPUT_CNTL_0, 0, 0},
-	{R_028648_SPI_PS_INPUT_CNTL_1, 0, 0},
-	{R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0},
-	{R_028650_SPI_PS_INPUT_CNTL_3, 0, 0},
-	{R_028654_SPI_PS_INPUT_CNTL_4, 0, 0},
-	{R_028658_SPI_PS_INPUT_CNTL_5, 0, 0},
-	{R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0},
-	{R_028660_SPI_PS_INPUT_CNTL_7, 0, 0},
-	{R_028664_SPI_PS_INPUT_CNTL_8, 0, 0},
-	{R_028668_SPI_PS_INPUT_CNTL_9, 0, 0},
-	{R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0},
-	{R_028670_SPI_PS_INPUT_CNTL_11, 0, 0},
-	{R_028674_SPI_PS_INPUT_CNTL_12, 0, 0},
-	{R_028678_SPI_PS_INPUT_CNTL_13, 0, 0},
-	{R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0},
-	{R_028680_SPI_PS_INPUT_CNTL_15, 0, 0},
-	{R_028684_SPI_PS_INPUT_CNTL_16, 0, 0},
-	{R_028688_SPI_PS_INPUT_CNTL_17, 0, 0},
-	{R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0},
-	{R_028690_SPI_PS_INPUT_CNTL_19, 0, 0},
-	{R_028694_SPI_PS_INPUT_CNTL_20, 0, 0},
-	{R_028698_SPI_PS_INPUT_CNTL_21, 0, 0},
-	{R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0},
-	{R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0},
-	{R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0},
-	{R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0},
-	{R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0},
-	{R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0},
-	{R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0},
-	{R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0},
-	{R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0},
-	{R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0},
-	{GROUP_FORCE_NEW_BLOCK, 0, 0},
-	{R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0},
-	{R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0},
-	{R_0286D8_SPI_INPUT_Z, 0, 0},
-	{R_0286E0_SPI_BARYC_CNTL, 0, 0},
-	{R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0},
-	{R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0},
-	{R_028844_SQ_PGM_RESOURCES_PS, 0, 0},
-	{R_02884C_SQ_PGM_EXPORTS_PS, 0, 0},
-};
-
-static const struct r600_reg cayman_context_reg_list[] = {
-	{R_028644_SPI_PS_INPUT_CNTL_0, 0, 0},
-	{R_028648_SPI_PS_INPUT_CNTL_1, 0, 0},
-	{R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0},
-	{R_028650_SPI_PS_INPUT_CNTL_3, 0, 0},
-	{R_028654_SPI_PS_INPUT_CNTL_4, 0, 0},
-	{R_028658_SPI_PS_INPUT_CNTL_5, 0, 0},
-	{R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0},
-	{R_028660_SPI_PS_INPUT_CNTL_7, 0, 0},
-	{R_028664_SPI_PS_INPUT_CNTL_8, 0, 0},
-	{R_028668_SPI_PS_INPUT_CNTL_9, 0, 0},
-	{R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0},
-	{R_028670_SPI_PS_INPUT_CNTL_11, 0, 0},
-	{R_028674_SPI_PS_INPUT_CNTL_12, 0, 0},
-	{R_028678_SPI_PS_INPUT_CNTL_13, 0, 0},
-	{R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0},
-	{R_028680_SPI_PS_INPUT_CNTL_15, 0, 0},
-	{R_028684_SPI_PS_INPUT_CNTL_16, 0, 0},
-	{R_028688_SPI_PS_INPUT_CNTL_17, 0, 0},
-	{R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0},
-	{R_028690_SPI_PS_INPUT_CNTL_19, 0, 0},
-	{R_028694_SPI_PS_INPUT_CNTL_20, 0, 0},
-	{R_028698_SPI_PS_INPUT_CNTL_21, 0, 0},
-	{R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0},
-	{R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0},
-	{R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0},
-	{R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0},
-	{R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0},
-	{R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0},
-	{R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0},
-	{R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0},
-	{R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0},
-	{R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0},
-	{R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0},
-	{R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0},
-	{R_0286D8_SPI_INPUT_Z, 0, 0},
-	{R_0286E0_SPI_BARYC_CNTL, 0, 0},
-	{R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0},
-	{R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0},
-	{R_028844_SQ_PGM_RESOURCES_PS, 0, 0},
-	{R_02884C_SQ_PGM_EXPORTS_PS, 0, 0},
-};
-
 int evergreen_context_init(struct r600_context *ctx)
 {
 	int r = 0;
 
 	/* add blocks */
-	if (ctx->family >= CHIP_CAYMAN)
-		r = r600_context_add_block(ctx, cayman_context_reg_list,
-					   Elements(cayman_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
-	else
-		r = r600_context_add_block(ctx, evergreen_context_reg_list,
-					   Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
-	if (r)
-		goto out_err;
-
 	r = r600_setup_block_table(ctx);
 	if (r)
 		goto out_err;
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index c52e4c8..2bdefb0 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2778,7 +2778,9 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0);
 	r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
 	r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0);
-	r600_store_context_reg(cb, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0);
+	r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2);
+	r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */
+	r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */
 	r600_store_context_reg(cb, R_028B54_VGT_SHADER_STAGES_EN, 0);
 	r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
 
@@ -3234,7 +3236,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0);
 	r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
 	r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0);
-	r600_store_context_reg(cb, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0);
+	r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2);
+	r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */
+	r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */
 	r600_store_context_reg(cb, R_0288EC_SQ_LDS_ALLOC_PS, 0);
 	r600_store_context_reg(cb, R_028B54_VGT_SHADER_STAGES_EN, 0);
 
@@ -3245,17 +3249,22 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct r600_pipe_state *rstate = &shader->rstate;
+	struct r600_command_buffer *cb = &shader->command_buffer;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0;
 	int pos_index = -1, face_index = -1;
 	int ninterp = 0;
 	boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
-	unsigned spi_baryc_cntl, sid, tmp, idx = 0;
+	unsigned spi_baryc_cntl, sid, tmp, num = 0;
 	unsigned z_export = 0, stencil_export = 0;
 	unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
+	uint32_t spi_ps_input_cntl[32];
 
-	rstate->nregs = 0;
+	if (!cb->buf) {
+		r600_init_command_buffer(cb, 64);
+	} else {
+		cb->num_dw = 0;
+	}
 
 	for (i = 0; i < rshader->ninput; i++) {
 		/* evergreen NUM_INTERP only contains values interpolated into the LDS,
@@ -3277,7 +3286,6 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 		sid = rshader->input[i].spi_sid;
 
 		if (sid) {
-
 			tmp = S_028644_SEMANTIC(sid);
 
 			if (rshader->input[i].name == TGSI_SEMANTIC_POSITION ||
@@ -3292,13 +3300,13 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 				tmp |= S_028644_PT_SPRITE_TEX(1);
 			}
 
-			r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + idx * 4,
-					tmp);
-
-			idx++;
+			spi_ps_input_cntl[num++] = tmp;
 		}
 	}
 
+	r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, num);
+	r600_store_array(cb, num, spi_ps_input_cntl);
+
 	for (i = 0; i < rshader->noutput; i++) {
 		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
 			z_export = 1;
@@ -3342,7 +3350,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 		spi_ps_in_control_0 |=  S_0286CC_POSITION_ENA(1) |
 			S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
 			S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr);
-		spi_input_z |= 1;
+		spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
 	}
 
 	spi_ps_in_control_1 = 0;
@@ -3359,29 +3367,21 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 		spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) |
 				  S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
 
-	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0,
-				spi_ps_in_control_0);
-	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1,
-				spi_ps_in_control_1);
-	r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2,
-				0);
-	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z);
-	r600_pipe_state_add_reg(rstate,
-				R_0286E0_SPI_BARYC_CNTL,
-				spi_baryc_cntl);
-
-	r600_pipe_state_add_reg_bo(rstate,
-				R_028840_SQ_PGM_START_PS,
-				r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
-				shader->bo, RADEON_USAGE_READ);
-	r600_pipe_state_add_reg(rstate,
-				R_028844_SQ_PGM_RESOURCES_PS,
-				S_028844_NUM_GPRS(rshader->bc.ngpr) |
-				S_028844_PRIME_CACHE_ON_DRAW(1) |
-				S_028844_STACK_SIZE(rshader->bc.nstack));
-	r600_pipe_state_add_reg(rstate,
-				R_02884C_SQ_PGM_EXPORTS_PS,
-				exports_ps);
+	r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2);
+	r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */
+	r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */
+
+	r600_store_context_reg(cb, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
+	r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z);
+	r600_store_context_reg(cb, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps);
+
+	r600_store_context_reg_seq(cb, R_028840_SQ_PGM_START_PS, 2);
+	r600_store_value(cb, r600_resource_va(ctx->screen, (void *)shader->bo) >> 8);
+	r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
+			 S_028844_NUM_GPRS(rshader->bc.ngpr) |
+			 S_028844_PRIME_CACHE_ON_DRAW(1) |
+			 S_028844_STACK_SIZE(rshader->bc.nstack));
+	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
 
 	shader->db_shader_control = db_shader_control;
 	shader->ps_depth_export = z_export | stencil_export;
@@ -3755,6 +3755,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
 	r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
 	r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
+	r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
 
 	rctx->context.create_blend_state = evergreen_create_blend_state;
 	rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 12c7ed1..53b68a4 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1798,6 +1798,7 @@
 #define R_0286C0_SPI_PS_INPUT_CNTL_31                0x000286C0
 #define R_0286C8_SPI_THREAD_GROUPING                 0x000286C8
 #define R_0286D8_SPI_INPUT_Z                         0x000286D8
+#define   S_0286D8_PROVIDE_Z_TO_SPI(x)			(((x) & 0x1) << 0)
 #define R_0286DC_SPI_FOG_CNTL                        0x000286DC
 #define R_0286E4_SPI_PS_IN_CONTROL_2                 0x000286E4
 #define R_0286E8_SPI_COMPUTE_INPUT_CNTL              0x000286E8
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index a2eefa8..dda38e3 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -215,49 +215,6 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
 	return 0;
 }
 
-static const struct r600_reg r600_context_reg_list[] = {
-	{R_028644_SPI_PS_INPUT_CNTL_0, 0, 0},
-	{R_028648_SPI_PS_INPUT_CNTL_1, 0, 0},
-	{R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0},
-	{R_028650_SPI_PS_INPUT_CNTL_3, 0, 0},
-	{R_028654_SPI_PS_INPUT_CNTL_4, 0, 0},
-	{R_028658_SPI_PS_INPUT_CNTL_5, 0, 0},
-	{R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0},
-	{R_028660_SPI_PS_INPUT_CNTL_7, 0, 0},
-	{R_028664_SPI_PS_INPUT_CNTL_8, 0, 0},
-	{R_028668_SPI_PS_INPUT_CNTL_9, 0, 0},
-	{R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0},
-	{R_028670_SPI_PS_INPUT_CNTL_11, 0, 0},
-	{R_028674_SPI_PS_INPUT_CNTL_12, 0, 0},
-	{R_028678_SPI_PS_INPUT_CNTL_13, 0, 0},
-	{R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0},
-	{R_028680_SPI_PS_INPUT_CNTL_15, 0, 0},
-	{R_028684_SPI_PS_INPUT_CNTL_16, 0, 0},
-	{R_028688_SPI_PS_INPUT_CNTL_17, 0, 0},
-	{R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0},
-	{R_028690_SPI_PS_INPUT_CNTL_19, 0, 0},
-	{R_028694_SPI_PS_INPUT_CNTL_20, 0, 0},
-	{R_028698_SPI_PS_INPUT_CNTL_21, 0, 0},
-	{R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0},
-	{R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0},
-	{R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0},
-	{R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0},
-	{R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0},
-	{R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0},
-	{R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0},
-	{R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0},
-	{R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0},
-	{R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0},
-	{R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0},
-	{R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0},
-	{R_0286D8_SPI_INPUT_Z, 0, 0},
-	{GROUP_FORCE_NEW_BLOCK, 0, 0},
-	{R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0},
-	{GROUP_FORCE_NEW_BLOCK, 0, 0},
-	{R_028850_SQ_PGM_RESOURCES_PS, 0, 0},
-	{R_028854_SQ_PGM_EXPORTS_PS, 0, 0},
-};
-
 /* initialize */
 void r600_context_fini(struct r600_context *ctx)
 {
@@ -322,12 +279,6 @@ int r600_context_init(struct r600_context *ctx)
 {
 	int r;
 
-	/* add blocks */
-	r = r600_context_add_block(ctx, r600_context_reg_list,
-				   Elements(r600_context_reg_list), PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET);
-	if (r)
-		goto out_err;
-
 	r = r600_setup_block_table(ctx);
 	if (r)
 		goto out_err;
@@ -806,6 +757,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	ctx->db_misc_state.atom.dirty = true;
 	ctx->db_state.atom.dirty = true;
 	ctx->framebuffer.atom.dirty = true;
+	ctx->pixel_shader.atom.dirty = true;
 	ctx->poly_offset_state.atom.dirty = true;
 	ctx->vgt_state.atom.dirty = true;
 	ctx->sample_mask.atom.dirty = true;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 813012f..ba0aaad 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -34,7 +34,7 @@
 #include "r600_public.h"
 #include "r600_resource.h"
 
-#define R600_NUM_ATOMS 39
+#define R600_NUM_ATOMS 40
 
 #define R600_TRACE_CS 0
 
@@ -805,6 +805,13 @@ static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned val
 	cb->buf[cb->num_dw++] = value;
 }
 
+static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
+{
+	assert(cb->num_dw+num <= cb->max_num_dw);
+	memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0]));
+	cb->num_dw += num;
+}
+
 static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
 {
 	assert(reg < R600_CONTEXT_REG_OFFSET);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index e8d7c84..846c159 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2690,7 +2690,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
 void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct r600_pipe_state *rstate = &shader->rstate;
+	struct r600_command_buffer *cb = &shader->command_buffer;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
 	int pos_index = -1, face_index = -1;
@@ -2699,8 +2699,13 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 	unsigned z_export = 0, stencil_export = 0;
 	unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
 
-	rstate->nregs = 0;
+	if (!cb->buf) {
+		r600_init_command_buffer(cb, 64);
+	} else {
+		cb->num_dw = 0;
+	}
 
+	r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, rshader->ninput);
 	for (i = 0; i < rshader->ninput; i++) {
 		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
 			pos_index = i;
@@ -2730,8 +2735,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 			tmp |= S_028644_SEL_LINEAR(1);
 		}
 
-		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4,
-				tmp);
+		r600_store_value(cb, tmp);
 	}
 
 	db_shader_control = 0;
@@ -2771,7 +2775,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
 					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
 					S_0286CC_BARYC_SAMPLE_CNTL(1));
-		spi_input_z |= 1;
+		spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
 	}
 
 	spi_ps_in_control_1 = 0;
@@ -2784,20 +2788,22 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 	if (rctx->family == CHIP_R600)
 		ufi = 1;
 
-	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0);
-	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1);
-	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z);
-	r600_pipe_state_add_reg_bo(rstate,
-				   R_028840_SQ_PGM_START_PS,
-				   0, shader->bo, RADEON_USAGE_READ);
-	r600_pipe_state_add_reg(rstate,
-				R_028850_SQ_PGM_RESOURCES_PS,
-				S_028850_NUM_GPRS(rshader->bc.ngpr) |
-				S_028850_STACK_SIZE(rshader->bc.nstack) |
-				S_028850_UNCACHED_FIRST_INST(ufi));
-	r600_pipe_state_add_reg(rstate,
-				R_028854_SQ_PGM_EXPORTS_PS,
-				exports_ps);
+	r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2);
+	r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */
+	r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */
+
+	r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z);
+
+	r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2);
+	r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/
+			 S_028850_NUM_GPRS(rshader->bc.ngpr) |
+			 S_028850_STACK_SIZE(rshader->bc.nstack) |
+			 S_028850_UNCACHED_FIRST_INST(ufi));
+	r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */
+
+	r600_store_context_reg(cb, R_028840_SQ_PGM_START_PS, 0);
+	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
+
 	/* only set some bits here, the other bits are set in the dsa state */
 	shader->db_shader_control = db_shader_control;
 	shader->ps_depth_export = z_export | stencil_export;
@@ -3192,6 +3198,7 @@ void r600_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
 	r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
 	r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
+	r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
 
 	rctx->context.create_blend_state = r600_create_blend_state;
 	rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 842d0d4..89eb5e3 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -39,6 +39,7 @@
 
 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
 {
+	assert(!cb->buf);
 	cb->buf = CALLOC(1, 4 * num_dw);
 	cb->max_num_dw = num_dw;
 }
@@ -707,7 +708,7 @@ static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_contex
  * (*dirty) is set to 1 if current variant was changed */
 static int r600_shader_select(struct pipe_context *ctx,
         struct r600_pipe_shader_selector* sel,
-        unsigned *dirty)
+        bool *dirty)
 {
 	struct r600_shader_key key;
 	struct r600_context *rctx = (struct r600_context *)ctx;
@@ -766,7 +767,7 @@ static int r600_shader_select(struct pipe_context *ctx,
 	}
 
 	if (dirty)
-		*dirty = 1;
+		*dirty = true;
 
 	shader->next_variant = sel->current;
 	sel->current = shader;
@@ -816,8 +817,9 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
 	if (!state)
 		state = rctx->dummy_pixel_shader;
 
-	rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
-	r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
+	rctx->pixel_shader.shader = rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
+	rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw;
+	rctx->pixel_shader.atom.dirty = true;
 
 	r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
 
@@ -1198,7 +1200,7 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s
 static bool r600_update_derived_state(struct r600_context *rctx)
 {
 	struct pipe_context * ctx = (struct pipe_context*)rctx;
-	unsigned ps_dirty = 0;
+	bool ps_dirty = false;
 	bool blend_disable;
 
 	if (!rctx->blitter->running) {
@@ -1227,11 +1229,13 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 		else
 			r600_update_ps_state(ctx, rctx->ps_shader->current);
 
-		ps_dirty = 1;
+		ps_dirty = true;
 	}
 
-	if (ps_dirty)
-		r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
+	if (ps_dirty) {
+		rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw;
+		rctx->pixel_shader.atom.dirty = true;
+	}
 
 	/* on R600 we stuff masks + txq info into one constant buffer */
 	/* on evergreen we only need a txq info one */




More information about the mesa-commit mailing list