[Mesa-dev] [PATCH 07/14] radeonsi: move the guardband registers into a separate state atom

Marek Olšák maraeo at gmail.com
Fri Jun 1 05:21:15 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

They have a different frequency of updates and don't change when scissors
change.

I think this even fixes something in si_update_vs_viewport_state.
---
 src/gallium/drivers/radeonsi/si_gfx_cs.c      |  1 +
 src/gallium/drivers/radeonsi/si_state.c       |  9 +++--
 src/gallium/drivers/radeonsi/si_state.h       |  2 ++
 src/gallium/drivers/radeonsi/si_state_draw.c  |  6 ++--
 .../drivers/radeonsi/si_state_viewport.c      | 36 ++++++++++++-------
 5 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 84536b7c6f6..c7ec83789b1 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -277,20 +277,21 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
 	si_all_descriptors_begin_new_cs(ctx);
 	si_all_resident_buffers_begin_new_cs(ctx);
 
 	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
 	if (ctx->scratch_buffer) {
 		si_context_add_resource_size(ctx, &ctx->scratch_buffer->b.b);
 	}
 
 	if (ctx->streamout.suspended) {
 		ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3a7e928df53..3d19af28507 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -995,27 +995,30 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
 			si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
 	}
 
 	sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
 	sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
 
 	si_pm4_bind_state(sctx, rasterizer, rs);
 	si_update_poly_offset_state(sctx);
 
 	if (!old_rs ||
-	    (old_rs->scissor_enable != rs->scissor_enable ||
-	     old_rs->line_width != rs->line_width ||
-	     old_rs->max_point_size != rs->max_point_size)) {
+	    old_rs->scissor_enable != rs->scissor_enable) {
 		sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
 	}
 
+	if (!old_rs ||
+	    old_rs->line_width != rs->line_width ||
+	    old_rs->max_point_size != rs->max_point_size)
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband);
+
 	if (!old_rs ||
 	    old_rs->clip_halfz != rs->clip_halfz) {
 		sctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports);
 	}
 
 	if (!old_rs ||
 	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
 	    old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 9da58ac9710..3cfcf75a22f 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -200,20 +200,21 @@ union si_state_atoms {
 		struct si_atom msaa_sample_locs;
 		struct si_atom db_render_state;
 		struct si_atom dpbb_state;
 		struct si_atom msaa_config;
 		struct si_atom sample_mask;
 		struct si_atom cb_render_state;
 		struct si_atom blend_color;
 		struct si_atom clip_regs;
 		struct si_atom clip_state;
 		struct si_atom shader_pointers;
+		struct si_atom guardband;
 		struct si_atom scissors;
 		struct si_atom viewports;
 		struct si_atom stencil_ref;
 		struct si_atom spi_map;
 		struct si_atom scratch_state;
 	} s;
 	struct si_atom array[0];
 };
 
 #define SI_ATOM_BIT(name) (1 << (offsetof(union si_state_atoms, s.name) / \
@@ -227,20 +228,21 @@ static inline unsigned si_atoms_that_roll_context(void)
 		SI_ATOM_BIT(framebuffer) |
 		SI_ATOM_BIT(msaa_sample_locs) |
 		SI_ATOM_BIT(db_render_state) |
 		SI_ATOM_BIT(dpbb_state) |
 		SI_ATOM_BIT(msaa_config) |
 		SI_ATOM_BIT(sample_mask) |
 		SI_ATOM_BIT(cb_render_state) |
 		SI_ATOM_BIT(blend_color) |
 		SI_ATOM_BIT(clip_regs) |
 		SI_ATOM_BIT(clip_state) |
+		SI_ATOM_BIT(guardband) |
 		SI_ATOM_BIT(scissors) |
 		SI_ATOM_BIT(viewports) |
 		SI_ATOM_BIT(stencil_ref) |
 		SI_ATOM_BIT(spi_map) |
 		SI_ATOM_BIT(scratch_state));
 }
 
 struct si_shader_data {
 	uint32_t		sh_base[SI_NUM_SHADERS];
 };
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 5588c9a2c53..e33e235620a 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1270,24 +1270,22 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		if (sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_POINT_MODE])
 			rast_prim = PIPE_PRIM_POINTS;
 		else
 			rast_prim = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
 	} else
 		rast_prim = info->mode;
 
 	if (rast_prim != sctx->current_rast_prim) {
 		bool old_is_poly = sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES;
 		bool new_is_poly = rast_prim >= PIPE_PRIM_TRIANGLES;
-		if (old_is_poly != new_is_poly) {
-			sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-			si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
-		}
+		if (old_is_poly != new_is_poly)
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband);
 
 		sctx->current_rast_prim = rast_prim;
 		sctx->do_update_shaders = true;
 	}
 
 	if (sctx->tes_shader.cso &&
 	    sctx->screen->has_ls_vgpr_init_bug) {
 		/* Determine whether the LS VGPR fix should be applied.
 		 *
 		 * It is only required when num input CPs > num output CPs,
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index bffb1f91827..97b1b89b48b 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -129,28 +129,42 @@ static void si_emit_one_scissor(struct si_context *ctx,
 	radeon_emit(cs, S_028250_TL_X(final.minx) |
 			S_028250_TL_Y(final.miny) |
 			S_028250_WINDOW_OFFSET_DISABLE(1));
 	radeon_emit(cs, S_028254_BR_X(final.maxx) |
 			S_028254_BR_Y(final.maxy));
 }
 
 /* the range is [-MAX, MAX] */
 #define SI_MAX_VIEWPORT_RANGE 32768
 
-static void si_emit_guardband(struct si_context *ctx,
-			      struct si_signed_scissor *vp_as_scissor)
+static void si_emit_guardband(struct si_context *ctx)
 {
+	const struct si_signed_scissor *vp_as_scissor;
+	struct si_signed_scissor max_vp_scissor;
 	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 	struct pipe_viewport_state vp;
 	float left, top, right, bottom, max_range, guardband_x, guardband_y;
 	float discard_x, discard_y;
 
+	if (ctx->vs_writes_viewport_index) {
+		/* Shaders can draw to any viewport. Make a union of all
+		 * viewports. */
+		max_vp_scissor = ctx->viewports.as_scissor[0];
+		for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) {
+			si_scissor_make_union(&max_vp_scissor,
+					      &ctx->viewports.as_scissor[i]);
+		}
+		vp_as_scissor = &max_vp_scissor;
+	} else {
+		vp_as_scissor = &ctx->viewports.as_scissor[0];
+	}
+
 	/* Reconstruct the viewport transformation from the scissor. */
 	vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
 	vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
 	vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
 	vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
 
 	/* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
 	if (vp_as_scissor->minx == vp_as_scissor->maxx)
 		vp.scale[0] = 0.5;
 	if (vp_as_scissor->miny == vp_as_scissor->maxy)
@@ -209,59 +223,49 @@ static void si_emit_guardband(struct si_context *ctx,
 	radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
 	radeon_emit(cs, fui(discard_x));   /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
 }
 
 static void si_emit_scissors(struct si_context *ctx)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 	struct pipe_scissor_state *states = ctx->scissors.states;
 	unsigned mask = ctx->scissors.dirty_mask;
 	bool scissor_enabled = false;
-	struct si_signed_scissor max_vp_scissor;
-	int i;
 
 	if (ctx->queued.named.rasterizer)
 		scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
 
 	/* The simple case: Only 1 viewport is active. */
 	if (!ctx->vs_writes_viewport_index) {
 		struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
 
 		if (!(mask & 1))
 			return;
 
 		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
 		si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
-		si_emit_guardband(ctx, vp);
 		ctx->scissors.dirty_mask &= ~1; /* clear one bit */
 		return;
 	}
 
-	/* Shaders can draw to any viewport. Make a union of all viewports. */
-	max_vp_scissor = ctx->viewports.as_scissor[0];
-	for (i = 1; i < SI_MAX_VIEWPORTS; i++)
-		si_scissor_make_union(&max_vp_scissor,
-				      &ctx->viewports.as_scissor[i]);
-
 	while (mask) {
 		int start, count, i;
 
 		u_bit_scan_consecutive_range(&mask, &start, &count);
 
 		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
 					       start * 4 * 2, count * 2);
 		for (i = start; i < start+count; i++) {
 			si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
 					    scissor_enabled ? &states[i] : NULL);
 		}
 	}
-	si_emit_guardband(ctx, &max_vp_scissor);
 	ctx->scissors.dirty_mask = 0;
 }
 
 static void si_set_viewport_states(struct pipe_context *pctx,
 				   unsigned start_slot,
 				   unsigned num_viewports,
 				   const struct pipe_viewport_state *state)
 {
 	struct si_context *ctx = (struct si_context *)pctx;
 	unsigned mask;
@@ -273,20 +277,21 @@ static void si_set_viewport_states(struct pipe_context *pctx,
 		ctx->viewports.states[index] = state[i];
 		si_get_scissor_from_viewport(ctx, &state[i],
 					     &ctx->viewports.as_scissor[index]);
 	}
 
 	mask = ((1 << num_viewports) - 1) << start_slot;
 	ctx->viewports.dirty_mask |= mask;
 	ctx->viewports.depth_range_dirty_mask |= mask;
 	ctx->scissors.dirty_mask |= mask;
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 }
 
 static void si_emit_one_viewport(struct si_context *ctx,
 				 struct pipe_viewport_state *state)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 
 	radeon_emit(cs, fui(state->scale[0]));
 	radeon_emit(cs, fui(state->translate[0]));
@@ -412,30 +417,37 @@ void si_update_vs_viewport_state(struct si_context *ctx)
 
 	if (ctx->vs_disables_clipping_viewport != vs_window_space) {
 		ctx->vs_disables_clipping_viewport = vs_window_space;
 		ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 		ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 		si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 		si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 	}
 
 	/* Viewport index handling. */
+	if (ctx->vs_writes_viewport_index == info->writes_viewport_index)
+		return;
+
+	/* This changes how the guardband is computed. */
 	ctx->vs_writes_viewport_index = info->writes_viewport_index;
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
+
 	if (!ctx->vs_writes_viewport_index)
 		return;
 
 	if (ctx->scissors.dirty_mask)
 	    si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 
 	if (ctx->viewports.dirty_mask ||
 	    ctx->viewports.depth_range_dirty_mask)
 	    si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 }
 
 void si_init_viewport_functions(struct si_context *ctx)
 {
+	ctx->atoms.s.guardband.emit = si_emit_guardband;
 	ctx->atoms.s.scissors.emit = si_emit_scissors;
 	ctx->atoms.s.viewports.emit = si_emit_viewport_states;
 
 	ctx->b.set_scissor_states = si_set_scissor_states;
 	ctx->b.set_viewport_states = si_set_viewport_states;
 }
-- 
2.17.0



More information about the mesa-dev mailing list