[Mesa-dev] [PATCH 09/14] radeonsi: assume that rasterizer state is non-NULL in draw_vbo

Marek Olšák maraeo at gmail.com
Fri Jun 1 05:21:17 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_state.c       |   4 +-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  13 +--
 .../drivers/radeonsi/si_state_shaders.c       | 106 +++++++++---------
 .../drivers/radeonsi/si_state_viewport.c      |  13 +--
 4 files changed, 61 insertions(+), 75 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3d19af28507..8489d0f9778 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1404,21 +1404,21 @@ static void si_emit_db_render_state(struct si_context *sctx)
 
 	db_shader_control = sctx->ps_db_shader_control;
 
 	/* Bug workaround for smoothing (overrasterization) on SI. */
 	if (sctx->chip_class == SI && sctx->smoothing_enabled) {
 		db_shader_control &= C_02880C_Z_ORDER;
 		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
 	}
 
 	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
-	if (!rs || !rs->multisample_enable)
+	if (!rs->multisample_enable)
 		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
 
 	if (sctx->screen->has_rbplus &&
 	    !sctx->screen->rbplus_allowed)
 		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
 
 	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
 			       db_shader_control);
 }
 
@@ -3237,21 +3237,21 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
 			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
 			/* line bug */
 			S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12);
 
 		/* The alternative of setting sample locations to 0 would
 		 * require a DB flush to avoid Z errors, see
 		 * https://bugs.freedesktop.org/show_bug.cgi?id=96908
 		 */
 		if (has_msaa_sample_loc_bug &&
 		    sctx->framebuffer.nr_samples > 1 &&
-		    rs && !rs->multisample_enable)
+		    !rs->multisample_enable)
 			small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
 
 		radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
 				       small_prim_filter_cntl);
 	}
 }
 
 static bool si_out_of_order_rasterization(struct si_context *sctx)
 {
 	struct si_state_blend *blend = sctx->queued.named.blend;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 5370587d747..942cb3c7994 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1222,29 +1222,24 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		 */
 		if (unlikely(!info->instance_count))
 			return;
 
 		/* Handle count == 0. */
 		if (unlikely(!info->count &&
 			     (index_size || !info->count_from_stream_output)))
 			return;
 	}
 
-	if (unlikely(!sctx->vs_shader.cso)) {
-		assert(0);
-		return;
-	}
-	if (unlikely(!sctx->ps_shader.cso && (!rs || !rs->rasterizer_discard))) {
-		assert(0);
-		return;
-	}
-	if (unlikely(!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES))) {
+	if (unlikely(!sctx->vs_shader.cso ||
+		     !rs ||
+		     (!sctx->ps_shader.cso && !rs->rasterizer_discard) ||
+		     (!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES)))) {
 		assert(0);
 		return;
 	}
 
 	/* Recompute and re-emit the texture resource states if needed. */
 	dirty_tex_counter = p_atomic_read(&sctx->screen->dirty_tex_counter);
 	if (unlikely(dirty_tex_counter != sctx->last_dirty_tex_counter)) {
 		sctx->last_dirty_tex_counter = dirty_tex_counter;
 		sctx->framebuffer.dirty_cbufs |=
 			((1 << sctx->framebuffer.state.nr_cbufs) - 1);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index aa270ebcb4d..e7610af2fa7 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1411,79 +1411,77 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 			key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
 		}
 
 		/* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
 		if (!key->part.ps.epilog.last_cbuf) {
 			key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
 			key->part.ps.epilog.color_is_int8 &= sel->info.colors_written;
 			key->part.ps.epilog.color_is_int10 &= sel->info.colors_written;
 		}
 
-		if (rs) {
-			bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim);
-			bool is_line = util_prim_is_lines(sctx->current_rast_prim);
+		bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim);
+		bool is_line = util_prim_is_lines(sctx->current_rast_prim);
 
-			key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
-			key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.colors_read;
+		key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
+		key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.colors_read;
 
-			if (sctx->queued.named.blend) {
-				key->part.ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
-							      rs->multisample_enable;
-			}
+		if (sctx->queued.named.blend) {
+			key->part.ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
+							   rs->multisample_enable;
+		}
 
-			key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
-			key->part.ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
-							      (is_line && rs->line_smooth)) &&
-							     sctx->framebuffer.nr_samples <= 1;
-			key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
+		key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
+		key->part.ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
+							   (is_line && rs->line_smooth)) &&
+							  sctx->framebuffer.nr_samples <= 1;
+		key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
 
-			if (sctx->ps_iter_samples > 1 &&
-			    sel->info.reads_samplemask) {
-				key->part.ps.prolog.samplemask_log_ps_iter =
-					util_logbase2(sctx->ps_iter_samples);
-			}
+		if (sctx->ps_iter_samples > 1 &&
+		    sel->info.reads_samplemask) {
+			key->part.ps.prolog.samplemask_log_ps_iter =
+				util_logbase2(sctx->ps_iter_samples);
+		}
 
-			if (rs->force_persample_interp &&
-			    rs->multisample_enable &&
-			    sctx->framebuffer.nr_samples > 1 &&
-			    sctx->ps_iter_samples > 1) {
-				key->part.ps.prolog.force_persp_sample_interp =
-					sel->info.uses_persp_center ||
-					sel->info.uses_persp_centroid;
-
-				key->part.ps.prolog.force_linear_sample_interp =
-					sel->info.uses_linear_center ||
-					sel->info.uses_linear_centroid;
-			} else if (rs->multisample_enable &&
-				   sctx->framebuffer.nr_samples > 1) {
-				key->part.ps.prolog.bc_optimize_for_persp =
-					sel->info.uses_persp_center &&
-					sel->info.uses_persp_centroid;
-				key->part.ps.prolog.bc_optimize_for_linear =
-					sel->info.uses_linear_center &&
-					sel->info.uses_linear_centroid;
-			} else {
-				/* Make sure SPI doesn't compute more than 1 pair
-				 * of (i,j), which is the optimization here. */
-				key->part.ps.prolog.force_persp_center_interp =
-					sel->info.uses_persp_center +
-					sel->info.uses_persp_centroid +
-					sel->info.uses_persp_sample > 1;
-
-				key->part.ps.prolog.force_linear_center_interp =
-					sel->info.uses_linear_center +
-					sel->info.uses_linear_centroid +
-					sel->info.uses_linear_sample > 1;
-
-				if (sel->info.opcode_count[TGSI_OPCODE_INTERP_SAMPLE])
-					key->mono.u.ps.interpolate_at_sample_force_center = 1;
-			}
+		if (rs->force_persample_interp &&
+		    rs->multisample_enable &&
+		    sctx->framebuffer.nr_samples > 1 &&
+		    sctx->ps_iter_samples > 1) {
+			key->part.ps.prolog.force_persp_sample_interp =
+				sel->info.uses_persp_center ||
+				sel->info.uses_persp_centroid;
+
+			key->part.ps.prolog.force_linear_sample_interp =
+				sel->info.uses_linear_center ||
+				sel->info.uses_linear_centroid;
+		} else if (rs->multisample_enable &&
+			   sctx->framebuffer.nr_samples > 1) {
+			key->part.ps.prolog.bc_optimize_for_persp =
+				sel->info.uses_persp_center &&
+				sel->info.uses_persp_centroid;
+			key->part.ps.prolog.bc_optimize_for_linear =
+				sel->info.uses_linear_center &&
+				sel->info.uses_linear_centroid;
+		} else {
+			/* Make sure SPI doesn't compute more than 1 pair
+			 * of (i,j), which is the optimization here. */
+			key->part.ps.prolog.force_persp_center_interp =
+				sel->info.uses_persp_center +
+				sel->info.uses_persp_centroid +
+				sel->info.uses_persp_sample > 1;
+
+			key->part.ps.prolog.force_linear_center_interp =
+				sel->info.uses_linear_center +
+				sel->info.uses_linear_centroid +
+				sel->info.uses_linear_sample > 1;
+
+			if (sel->info.opcode_count[TGSI_OPCODE_INTERP_SAMPLE])
+				key->mono.u.ps.interpolate_at_sample_force_center = 1;
 		}
 
 		key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
 
 		/* ps_uses_fbfetch is true only if the color buffer is bound. */
 		if (sctx->ps_uses_fbfetch) {
 			struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
 			struct pipe_resource *tex = cb0->texture;
 
 			/* 1D textures are allocated and used as 2D on GFX9. */
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index d16c3e7e41b..d0287d5ad75 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -186,22 +186,21 @@ static void si_emit_guardband(struct si_context *ctx)
 	bottom = ( max_range - vp.translate[1]) / vp.scale[1];
 
 	assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
 
 	guardband_x = MIN2(-left, right);
 	guardband_y = MIN2(-top, bottom);
 
 	discard_x = 1.0;
 	discard_y = 1.0;
 
-	if (unlikely(util_prim_is_points_or_lines(ctx->current_rast_prim)) &&
-	    ctx->queued.named.rasterizer) {
+	if (unlikely(util_prim_is_points_or_lines(ctx->current_rast_prim))) {
 		/* When rendering wide points or lines, we need to be more
 		 * conservative about when to discard them entirely. */
 		const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer;
 		float pixels;
 
 		if (ctx->current_rast_prim == PIPE_PRIM_POINTS)
 			pixels = rs->max_point_size;
 		else
 			pixels = rs->line_width;
 
@@ -222,24 +221,21 @@ static void si_emit_guardband(struct si_context *ctx)
 	radeon_emit(cs, fui(discard_y));   /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
 	radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
 	radeon_emit(cs, fui(discard_x));   /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
 }
 
 static void si_emit_scissors(struct si_context *ctx)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 	struct pipe_scissor_state *states = ctx->scissors.states;
 	unsigned mask = ctx->scissors.dirty_mask;
-	bool scissor_enabled = false;
-
-	if (ctx->queued.named.rasterizer)
-		scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
+	bool scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
 
 	/* The simple case: Only 1 viewport is active. */
 	if (!ctx->vs_writes_viewport_index) {
 		struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
 
 		if (!(mask & 1))
 			return;
 
 		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
 		si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
@@ -341,27 +337,24 @@ si_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
 		return;
 	}
 	util_viewport_zmin_zmax(vp, halfz, zmin, zmax);
 }
 
 static void si_emit_depth_ranges(struct si_context *ctx)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 	struct pipe_viewport_state *states = ctx->viewports.states;
 	unsigned mask = ctx->viewports.depth_range_dirty_mask;
-	bool clip_halfz = false;
+	bool clip_halfz = ctx->queued.named.rasterizer->clip_halfz;
 	bool window_space = ctx->vs_disables_clipping_viewport;
 	float zmin, zmax;
 
-	if (ctx->queued.named.rasterizer)
-		clip_halfz = ctx->queued.named.rasterizer->clip_halfz;
-
 	/* The simple case: Only 1 viewport is active. */
 	if (!ctx->vs_writes_viewport_index) {
 		if (!(mask & 1))
 			return;
 
 		si_viewport_zmin_zmax(&states[0], clip_halfz, window_space,
 				      &zmin, &zmax);
 
 		radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
 		radeon_emit(cs, fui(zmin));
-- 
2.17.0



More information about the mesa-dev mailing list