[Mesa-dev] [PATCH 15/15] radeonsi: use higher subpixel precision (QUANT_MODE) for smaller viewports

Marek Olšák maraeo at gmail.com
Tue Oct 2 22:35:47 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_get.c         |  4 +-
 src/gallium/drivers/radeonsi/si_pipe.h        |  8 +++
 .../drivers/radeonsi/si_state_viewport.c      | 50 ++++++++++++++++---
 3 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index a87cb3cbc8a..ac302b8a946 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -328,21 +328,23 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	}
 }
 
 static float si_get_paramf(struct pipe_screen* pscreen, enum pipe_capf param)
 {
 	switch (param) {
 	case PIPE_CAPF_MAX_LINE_WIDTH:
 	case PIPE_CAPF_MAX_LINE_WIDTH_AA:
 	case PIPE_CAPF_MAX_POINT_WIDTH:
 	case PIPE_CAPF_MAX_POINT_WIDTH_AA:
-		return 8192.0f;
+		/* This depends on the quant mode, though the precise interactions
+		 * are unknown. */
+		return 2048;
 	case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
 		return 16.0f;
 	case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
 		return 16.0f;
 	case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
 	case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
 	case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
 		return 0.0f;
 	}
 	return 0.0f;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 93082e262d6..7e15412ef87 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -596,25 +596,33 @@ struct si_framebuffer {
 	ubyte				uncompressed_cb_mask;
 	ubyte				color_is_int8;
 	ubyte				color_is_int10;
 	ubyte				dirty_cbufs;
 	bool				dirty_zsbuf;
 	bool				any_dst_linear;
 	bool				CB_has_shader_readable_metadata;
 	bool				DB_has_shader_readable_metadata;
 };
 
+enum si_quant_mode {
+	/* This is the list we want to support. */
+	SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH,
+	SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH,
+	SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH,
+};
+
 struct si_signed_scissor {
 	int minx;
 	int miny;
 	int maxx;
 	int maxy;
+	enum si_quant_mode quant_mode;
 };
 
 struct si_scissors {
 	unsigned			dirty_mask;
 	struct pipe_scissor_state	states[SI_MAX_VIEWPORTS];
 };
 
 struct si_viewports {
 	unsigned			dirty_mask;
 	unsigned			depth_range_dirty_mask;
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index c69a56dffae..819c773ba8e 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -100,20 +100,21 @@ static void si_clip_scissor(struct pipe_scissor_state *out,
 	out->maxy = MIN2(out->maxy, clip->maxy);
 }
 
 static void si_scissor_make_union(struct si_signed_scissor *out,
 				  struct si_signed_scissor *in)
 {
 	out->minx = MIN2(out->minx, in->minx);
 	out->miny = MIN2(out->miny, in->miny);
 	out->maxx = MAX2(out->maxx, in->maxx);
 	out->maxy = MAX2(out->maxy, in->maxy);
+	out->quant_mode = MIN2(out->quant_mode, in->quant_mode);
 }
 
 static void si_emit_one_scissor(struct si_context *ctx,
 				struct radeon_cmdbuf *cs,
 				struct si_signed_scissor *vp_scissor,
 				struct pipe_scissor_state *scissor)
 {
 	struct pipe_scissor_state final;
 
 	if (ctx->vs_disables_clipping_viewport) {
@@ -138,43 +139,47 @@ static void si_emit_one_scissor(struct si_context *ctx,
 		return;
 	}
 
 	radeon_emit(cs, S_028250_TL_X(final.minx) |
 			S_028250_TL_Y(final.miny) |
 			S_028250_WINDOW_OFFSET_DISABLE(1));
 	radeon_emit(cs, S_028254_BR_X(final.maxx) |
 			S_028254_BR_Y(final.maxy));
 }
 
-/* the range is [-MAX, MAX] */
-#define SI_MAX_VIEWPORT_RANGE 32768
-
 static void si_emit_guardband(struct si_context *ctx)
 {
 	const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer;
 	struct si_signed_scissor vp_as_scissor;
 	struct pipe_viewport_state vp;
 	float left, top, right, bottom, max_range, guardband_x, guardband_y;
 	float discard_x, discard_y;
 
 	if (ctx->vs_writes_viewport_index) {
 		/* Shaders can draw to any viewport. Make a union of all
 		 * viewports. */
 		vp_as_scissor = ctx->viewports.as_scissor[0];
 		for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) {
 			si_scissor_make_union(&vp_as_scissor,
 					      &ctx->viewports.as_scissor[i]);
 		}
 	} else {
 		vp_as_scissor = ctx->viewports.as_scissor[0];
 	}
 
+	/* Blits don't set the viewport state. The vertex shader determines
+	 * the viewport size by scaling the coordinates, so we don't know
+	 * how large the viewport is. Assume the worst case.
+	 */
+	if (ctx->vs_disables_clipping_viewport)
+		vp_as_scissor.quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
+
 	/* Determine the optimal hardware screen offset to center the viewport
 	 * within the viewport range in order to maximize the guardband size.
 	 */
 	int hw_screen_offset_x = (vp_as_scissor.maxx - vp_as_scissor.minx) / 2;
 	int hw_screen_offset_y = (vp_as_scissor.maxy - vp_as_scissor.miny) / 2;
 
 	const unsigned hw_screen_offset_max = 8176;
 	/* SI-CI need to align the offset to an ubertile consisting of all SEs. */
 	const unsigned hw_screen_offset_alignment =
 		ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
@@ -204,23 +209,25 @@ static void si_emit_guardband(struct si_context *ctx)
 	if (vp_as_scissor.miny == vp_as_scissor.maxy)
 		vp.scale[1] = 0.5;
 
 	/* Find the biggest guard band that is inside the supported viewport
 	 * range. The guard band is specified as a horizontal and vertical
 	 * distance from (0,0) in clip space.
 	 *
 	 * This is done by applying the inverse viewport transformation
 	 * on the viewport limits to get those limits in clip space.
 	 *
-	 * Use a limit one pixel smaller to allow for some precision error.
+	 * The viewport range is [-max_viewport_size/2, max_viewport_size/2].
 	 */
-	max_range = SI_MAX_VIEWPORT_RANGE - 1;
+	static unsigned max_viewport_size[] = {65535, 16383, 4095};
+	assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size));
+	max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2;
 	left   = (-max_range - vp.translate[0]) / vp.scale[0];
 	right  = ( max_range - vp.translate[0]) / vp.scale[0];
 	top    = (-max_range - vp.translate[1]) / vp.scale[1];
 	bottom = ( max_range - vp.translate[1]) / vp.scale[1];
 
 	assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
 
 	guardband_x = MIN2(-left, right);
 	guardband_y = MIN2(-top, bottom);
 
@@ -255,21 +262,22 @@ static void si_emit_guardband(struct si_context *ctx)
 				    SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
 				    fui(guardband_y), fui(discard_y),
 				    fui(guardband_x), fui(discard_x));
 	radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
 				   SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
 				   S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) |
 				   S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4));
 	radeon_opt_set_context_reg(ctx, R_028BE4_PA_SU_VTX_CNTL,
 				   SI_TRACKED_PA_SU_VTX_CNTL,
 				   S_028BE4_PIX_CENTER(rs->half_pixel_center) |
-				   S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
+				   S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH +
+						       vp_as_scissor.quant_mode));
 }
 
 static void si_emit_scissors(struct si_context *ctx)
 {
 	struct radeon_cmdbuf *cs = ctx->gfx_cs;
 	struct pipe_scissor_state *states = ctx->scissors.states;
 	unsigned mask = ctx->scissors.dirty_mask;
 	bool scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
 
 	/* The simple case: Only 1 viewport is active. */
@@ -304,24 +312,47 @@ static void si_set_viewport_states(struct pipe_context *pctx,
 				   unsigned start_slot,
 				   unsigned num_viewports,
 				   const struct pipe_viewport_state *state)
 {
 	struct si_context *ctx = (struct si_context *)pctx;
 	unsigned mask;
 	int i;
 
 	for (i = 0; i < num_viewports; i++) {
 		unsigned index = start_slot + i;
+		struct si_signed_scissor *scissor = &ctx->viewports.as_scissor[index];
 
 		ctx->viewports.states[index] = state[i];
-		si_get_scissor_from_viewport(ctx, &state[i],
-					     &ctx->viewports.as_scissor[index]);
+
+		si_get_scissor_from_viewport(ctx, &state[i], scissor);
+
+		unsigned w = scissor->maxx - scissor->minx;
+		unsigned h = scissor->maxy - scissor->miny;
+		unsigned max_extent = MAX2(w, h);
+
+		/* Determine the best quantization mode (subpixel precision),
+		 * but also leave enough space for the guardband.
+		 *
+		 * Note that primitive binning requires QUANT_MODE == 16_8 on Vega10
+		 * and Raven1. What we do depends on the chip:
+		 * - Vega10: Never use primitive binning.
+		 * - Raven1: Always use QUANT_MODE == 16_8.
+		 */
+		if (ctx->family == CHIP_RAVEN)
+			max_extent = 16384; /* Use QUANT_MODE == 16_8. */
+
+		if (max_extent <= 1024) /* 4K scanline area for guardband */
+			scissor->quant_mode = SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH;
+		else if (max_extent <= 4096) /* 16K scanline area for guardband */
+			scissor->quant_mode = SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH;
+		else /* 64K scanline area for guardband */
+			scissor->quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
 	}
 
 	mask = ((1 << num_viewports) - 1) << start_slot;
 	ctx->viewports.dirty_mask |= mask;
 	ctx->viewports.depth_range_dirty_mask |= mask;
 	ctx->scissors.dirty_mask |= mask;
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 }
@@ -558,11 +589,14 @@ static void si_set_window_rectangles(struct pipe_context *ctx,
 void si_init_viewport_functions(struct si_context *ctx)
 {
 	ctx->atoms.s.guardband.emit = si_emit_guardband;
 	ctx->atoms.s.scissors.emit = si_emit_scissors;
 	ctx->atoms.s.viewports.emit = si_emit_viewport_states;
 	ctx->atoms.s.window_rectangles.emit = si_emit_window_rectangles;
 
 	ctx->b.set_scissor_states = si_set_scissor_states;
 	ctx->b.set_viewport_states = si_set_viewport_states;
 	ctx->b.set_window_rectangles = si_set_window_rectangles;
+
+	for (unsigned i = 0; i < 16; i++)
+		ctx->viewports.as_scissor[i].quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
 }
-- 
2.17.1



More information about the mesa-dev mailing list