[Mesa-dev] [PATCH 4/4] radeonsi: implement EXT_window_rectangles

Marek Olšák maraeo at gmail.com
Thu Aug 9 01:29:45 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 docs/relnotes/18.3.0.html                     |  1 +
 src/amd/common/sid.h                          | 16 ++++
 src/gallium/drivers/radeonsi/si_blit.c        |  4 +
 src/gallium/drivers/radeonsi/si_get.c         |  2 +-
 src/gallium/drivers/radeonsi/si_gfx_cs.c      |  4 +
 src/gallium/drivers/radeonsi/si_pipe.h        |  3 +
 src/gallium/drivers/radeonsi/si_state.c       |  1 -
 src/gallium/drivers/radeonsi/si_state.h       |  3 +
 .../drivers/radeonsi/si_state_viewport.c      | 80 +++++++++++++++++++
 9 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
index 786145ad8da..ac2cc1e893b 100644
--- a/docs/relnotes/18.3.0.html
+++ b/docs/relnotes/18.3.0.html
@@ -45,20 +45,21 @@ TBD.
 
 
 <h2>New features</h2>
 
 <p>
 Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
 <li>GL_AMD_framebuffer_multisample_advanced on radeonsi.</li>
+<li>GL_EXT_window_rectangles on radeonsi.</li>
 </ul>
 
 <h2>Bug fixes</h2>
 
 <ul>
 <li>TBD</li>
 </ul>
 
 <h2>Changes</h2>
 
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 3c3bc541b4f..0671f7d3998 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -5275,20 +5275,36 @@
 #define   S_028208_BR_X(x)                                            (((unsigned)(x) & 0x7FFF) << 0)
 #define   G_028208_BR_X(x)                                            (((x) >> 0) & 0x7FFF)
 #define   C_028208_BR_X                                               0xFFFF8000
 #define   S_028208_BR_Y(x)                                            (((unsigned)(x) & 0x7FFF) << 16)
 #define   G_028208_BR_Y(x)                                            (((x) >> 16) & 0x7FFF)
 #define   C_028208_BR_Y                                               0x8000FFFF
 #define R_02820C_PA_SC_CLIPRECT_RULE                                    0x02820C
 #define   S_02820C_CLIP_RULE(x)                                       (((unsigned)(x) & 0xFFFF) << 0)
 #define   G_02820C_CLIP_RULE(x)                                       (((x) >> 0) & 0xFFFF)
 #define   C_02820C_CLIP_RULE                                          0xFFFF0000
+#define     V_02820C_OUT                                            0x0001
+#define     V_02820C_IN_0                                           0x0002
+#define     V_02820C_IN_1                                           0x0004
+#define     V_02820C_IN_10                                          0x0008
+#define     V_02820C_IN_2                                           0x0010
+#define     V_02820C_IN_20                                          0x0020
+#define     V_02820C_IN_21                                          0x0040
+#define     V_02820C_IN_210                                         0x0080
+#define     V_02820C_IN_3                                           0x0100
+#define     V_02820C_IN_30                                          0x0200
+#define     V_02820C_IN_31                                          0x0400
+#define     V_02820C_IN_310                                         0x0800
+#define     V_02820C_IN_32                                          0x1000
+#define     V_02820C_IN_320                                         0x2000
+#define     V_02820C_IN_321                                         0x4000
+#define     V_02820C_IN_3210                                        0x8000
 #define R_028210_PA_SC_CLIPRECT_0_TL                                    0x028210
 #define   S_028210_TL_X(x)                                            (((unsigned)(x) & 0x7FFF) << 0)
 #define   G_028210_TL_X(x)                                            (((x) >> 0) & 0x7FFF)
 #define   C_028210_TL_X                                               0xFFFF8000
 #define   S_028210_TL_Y(x)                                            (((unsigned)(x) & 0x7FFF) << 16)
 #define   G_028210_TL_Y(x)                                            (((x) >> 16) & 0x7FFF)
 #define   C_028210_TL_Y                                               0x8000FFFF
 #define R_028214_PA_SC_CLIPRECT_0_BR                                    0x028214
 #define   S_028214_BR_X(x)                                            (((unsigned)(x) & 0x7FFF) << 0)
 #define   G_028214_BR_X(x)                                            (((x) >> 0) & 0x7FFF)
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 0fd69f3a6c5..cf6495291bd 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -52,20 +52,24 @@ void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op)
 				     (struct pipe_stream_output_target**)sctx->streamout.targets);
 	util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
 
 	if (op & SI_SAVE_FRAGMENT_STATE) {
 		util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
 		util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
 		util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
 		util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
 		util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask);
 		util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
+		util_blitter_save_window_rectangles(sctx->blitter,
+						    sctx->window_rectangles_include,
+						    sctx->num_window_rectangles,
+						    sctx->window_rectangles);
 	}
 
 	if (op & SI_SAVE_FRAMEBUFFER)
 		util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state);
 
 	if (op & SI_SAVE_TEXTURES) {
 		util_blitter_save_fragment_sampler_states(
 			sctx->blitter, 2,
 			(void**)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states);
 
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 6e804797bab..e316703bbe3 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -201,20 +201,21 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 
         case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
                 return SI_MAP_BUFFER_ALIGNMENT;
 
 	case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
 	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
 	case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
 	case PIPE_CAP_MAX_VERTEX_STREAMS:
 	case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+	case PIPE_CAP_MAX_WINDOW_RECTANGLES:
 		return 4;
 
 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
 	case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
 		if (sscreen->info.has_indirect_compute_dispatch)
 			return param == PIPE_CAP_GLSL_FEATURE_LEVEL ?
 				450 : 440;
 		return 420;
 
 	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
@@ -236,21 +237,20 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 
 	/* Unsupported features. */
 	case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
 	case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
 	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_VERTEXID_NOBASE:
 	case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
-	case PIPE_CAP_MAX_WINDOW_RECTANGLES:
 	case PIPE_CAP_TGSI_MUL_ZERO_WINS:
 	case PIPE_CAP_UMA:
 	case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
 	case PIPE_CAP_POST_DEPTH_COVERAGE:
 	case PIPE_CAP_TILE_RASTER_ORDER:
 	case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
 	case PIPE_CAP_CONTEXT_PRIORITY_MASK:
 	case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
 	case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
 	case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 9dbd4c64f2a..c39564ecbe5 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -271,20 +271,23 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 	/* CLEAR_STATE sets zeros. */
 	if (!has_clear_state || ctx->blend_color.any_nonzeros)
 		si_mark_atom_dirty(ctx, &ctx->atoms.s.blend_color);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
 	if (ctx->chip_class >= GFX9)
 		si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
+	/* CLEAR_STATE disables all window rectangles. */
+	if (!has_clear_state || ctx->num_window_rectangles > 0)
+		si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
 	si_all_descriptors_begin_new_cs(ctx);
 	si_all_resident_buffers_begin_new_cs(ctx);
 
 	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 
@@ -339,20 +342,21 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL]	= 0x00090000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL]	= 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ]	= 0x3f800000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE]	= 0xffff;
 
 		/* Set all saved registers state to saved. */
 		ctx->tracked_regs.reg_saved = 0xffffffff;
 	} else {
 		/* Set all saved registers state to unknown. */
 		ctx->tracked_regs.reg_saved = 0;
 	}
 
 	/* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */
 	memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 2f77cc741bf..cfe28c2e8f0 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -797,20 +797,23 @@ struct si_context {
 	unsigned			sample_locs_num_samples;
 	uint16_t			sample_mask;
 	unsigned			last_cb_target_mask;
 	struct si_blend_color		blend_color;
 	struct si_clip_state		clip_state;
 	struct si_shader_data		shader_pointers;
 	struct si_stencil_ref		stencil_ref;
 	struct si_scissors		scissors;
 	struct si_streamout		streamout;
 	struct si_viewports		viewports;
+	unsigned			num_window_rectangles;
+	bool				window_rectangles_include;
+	struct pipe_scissor_state	window_rectangles[4];
 
 	/* Precomputed states. */
 	struct si_pm4_state		*init_config;
 	struct si_pm4_state		*init_config_gs_rings;
 	bool				init_config_has_vgt_flush;
 	struct si_pm4_state		*vgt_shader_config[4];
 
 	/* shaders */
 	struct si_shader_ctx_state	ps_shader;
 	struct si_shader_ctx_state	gs_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index fc1ec83931f..780d9010abc 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4865,21 +4865,20 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
 		si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
 		si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
 			       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
 		si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
 		si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
 			       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
 	}
 
 	if (!has_clear_state) {
-		si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
 		si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
 			       S_028230_ER_TRI(0xA) |
 			       S_028230_ER_POINT(0xA) |
 			       S_028230_ER_RECT(0xA) |
 			       /* Required by DX10_DIAMOND_TEST_ENA: */
 			       S_028230_ER_LINE_LR(0x1A) |
 			       S_028230_ER_LINE_RL(0x26) |
 			       S_028230_ER_LINE_TB(0xA) |
 			       S_028230_ER_LINE_BT(0xA));
 		/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 1edf5d646b6..1a8f3debf3a 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -206,20 +206,21 @@ union si_state_atoms {
 		struct si_atom blend_color;
 		struct si_atom clip_regs;
 		struct si_atom clip_state;
 		struct si_atom shader_pointers;
 		struct si_atom guardband;
 		struct si_atom scissors;
 		struct si_atom viewports;
 		struct si_atom stencil_ref;
 		struct si_atom spi_map;
 		struct si_atom scratch_state;
+		struct si_atom window_rectangles;
 	} s;
 	struct si_atom array[0];
 };
 
 #define SI_ATOM_BIT(name) (1 << (offsetof(union si_state_atoms, s.name) / \
 			         sizeof(struct si_atom)))
 #define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct si_atom*))
 
 static inline unsigned si_atoms_that_roll_context(void)
 {
@@ -274,20 +275,22 @@ enum si_tracked_reg {
 	SI_TRACKED_PA_CL_CLIP_CNTL,
 
 	SI_TRACKED_PA_SC_BINNER_CNTL_0,
 	SI_TRACKED_DB_DFSM_CONTROL,
 
 	SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */
 	SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ,
 	SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ,
 	SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ,
 
+	SI_TRACKED_PA_SC_CLIPRECT_RULE,
+
 	SI_NUM_TRACKED_REGS,
 };
 
 struct si_tracked_regs {
 	uint32_t		reg_saved;
 	uint32_t		reg_value[SI_NUM_TRACKED_REGS];
 	uint32_t		spi_ps_input_cntl[32];
 };
 
 /* Private read-write buffer slots. */
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index 4183be0c880..8dc68b126eb 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -428,19 +428,99 @@ void si_update_vs_viewport_state(struct si_context *ctx)
 		return;
 
 	if (ctx->scissors.dirty_mask)
 	    si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 
 	if (ctx->viewports.dirty_mask ||
 	    ctx->viewports.depth_range_dirty_mask)
 	    si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 }
 
+static void si_emit_window_rectangles(struct si_context *sctx)
+{
+	/* There are four clipping rectangles. Their corner coordinates are inclusive.
+	 * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
+	 * on whether the pixel is inside cliprects 0-3, respectively. For example,
+	 * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
+	 * the number 3 (binary 0011).
+	 *
+	 * If CLIPRECT_RULE & (1 << number), the pixel is rasterized.
+	 */
+	struct radeon_cmdbuf *cs = sctx->gfx_cs;
+	static const unsigned outside[4] = {
+		/* outside rectangle 0 */
+		V_02820C_OUT |
+		V_02820C_IN_1 |
+		V_02820C_IN_2 |
+		V_02820C_IN_21 |
+		V_02820C_IN_3 |
+		V_02820C_IN_31 |
+		V_02820C_IN_32 |
+		V_02820C_IN_321,
+		/* outside rectangles 0, 1 */
+		V_02820C_OUT |
+		V_02820C_IN_2 |
+		V_02820C_IN_3 |
+		V_02820C_IN_32,
+		/* outside rectangles 0, 1, 2 */
+		V_02820C_OUT |
+		V_02820C_IN_3,
+		/* outside rectangles 0, 1, 2, 3 */
+		V_02820C_OUT,
+	};
+	const unsigned disabled = 0xffff; /* all inside and outside cases */
+	unsigned num_rectangles = sctx->num_window_rectangles;
+	struct pipe_scissor_state *rects = sctx->window_rectangles;
+	unsigned rule;
+
+	assert(num_rectangles <= 4);
+
+	if (num_rectangles == 0)
+		rule = disabled;
+	else if (sctx->window_rectangles_include)
+		rule = ~outside[num_rectangles - 1];
+	else
+		rule = outside[num_rectangles - 1];
+
+	radeon_opt_set_context_reg(sctx, R_02820C_PA_SC_CLIPRECT_RULE,
+				   SI_TRACKED_PA_SC_CLIPRECT_RULE, rule);
+	if (num_rectangles == 0)
+		return;
+
+	radeon_set_context_reg_seq(cs, R_028210_PA_SC_CLIPRECT_0_TL,
+				   num_rectangles * 2);
+	for (unsigned i = 0; i < num_rectangles; i++) {
+		radeon_emit(cs, S_028210_TL_X(rects[i].minx) |
+				S_028210_TL_Y(rects[i].miny));
+		radeon_emit(cs, S_028214_BR_X(rects[i].maxx) |
+				S_028214_BR_Y(rects[i].maxy));
+	}
+}
+
+static void si_set_window_rectangles(struct pipe_context *ctx,
+				     boolean include,
+				     unsigned num_rectangles,
+				     const struct pipe_scissor_state *rects)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+
+	sctx->num_window_rectangles = num_rectangles;
+	sctx->window_rectangles_include = include;
+	if (num_rectangles) {
+		memcpy(sctx->window_rectangles, rects,
+		       sizeof(*rects) * num_rectangles);
+	}
+
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.window_rectangles);
+}
+
 void si_init_viewport_functions(struct si_context *ctx)
 {
 	ctx->atoms.s.guardband.emit = si_emit_guardband;
 	ctx->atoms.s.scissors.emit = si_emit_scissors;
 	ctx->atoms.s.viewports.emit = si_emit_viewport_states;
+	ctx->atoms.s.window_rectangles.emit = si_emit_window_rectangles;
 
 	ctx->b.set_scissor_states = si_set_scissor_states;
 	ctx->b.set_viewport_states = si_set_viewport_states;
+	ctx->b.set_window_rectangles = si_set_window_rectangles;
 }
-- 
2.17.1



More information about the mesa-dev mailing list