[Mesa-dev] [PATCH 07/14] radeonsi: move the guardband registers into a separate state atom
Marek Olšák
maraeo at gmail.com
Fri Jun 1 05:21:15 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
They have a different frequency of updates and don't change when scissors
change.
I think this even fixes something in si_update_vs_viewport_state.
---
src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 +
src/gallium/drivers/radeonsi/si_state.c | 9 +++--
src/gallium/drivers/radeonsi/si_state.h | 2 ++
src/gallium/drivers/radeonsi/si_state_draw.c | 6 ++--
.../drivers/radeonsi/si_state_viewport.c | 36 ++++++++++++-------
5 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 84536b7c6f6..c7ec83789b1 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -277,20 +277,21 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
si_all_descriptors_begin_new_cs(ctx);
si_all_resident_buffers_begin_new_cs(ctx);
ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
if (ctx->scratch_buffer) {
si_context_add_resource_size(ctx, &ctx->scratch_buffer->b.b);
}
if (ctx->streamout.suspended) {
ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3a7e928df53..3d19af28507 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -995,27 +995,30 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
}
sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_poly_offset_state(sctx);
if (!old_rs ||
- (old_rs->scissor_enable != rs->scissor_enable ||
- old_rs->line_width != rs->line_width ||
- old_rs->max_point_size != rs->max_point_size)) {
+ old_rs->scissor_enable != rs->scissor_enable) {
sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
}
+ if (!old_rs ||
+ old_rs->line_width != rs->line_width ||
+ old_rs->max_point_size != rs->max_point_size)
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband);
+
if (!old_rs ||
old_rs->clip_halfz != rs->clip_halfz) {
sctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports);
}
if (!old_rs ||
old_rs->clip_plane_enable != rs->clip_plane_enable ||
old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 9da58ac9710..3cfcf75a22f 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -200,20 +200,21 @@ union si_state_atoms {
struct si_atom msaa_sample_locs;
struct si_atom db_render_state;
struct si_atom dpbb_state;
struct si_atom msaa_config;
struct si_atom sample_mask;
struct si_atom cb_render_state;
struct si_atom blend_color;
struct si_atom clip_regs;
struct si_atom clip_state;
struct si_atom shader_pointers;
+ struct si_atom guardband;
struct si_atom scissors;
struct si_atom viewports;
struct si_atom stencil_ref;
struct si_atom spi_map;
struct si_atom scratch_state;
} s;
struct si_atom array[0];
};
#define SI_ATOM_BIT(name) (1 << (offsetof(union si_state_atoms, s.name) / \
@@ -227,20 +228,21 @@ static inline unsigned si_atoms_that_roll_context(void)
SI_ATOM_BIT(framebuffer) |
SI_ATOM_BIT(msaa_sample_locs) |
SI_ATOM_BIT(db_render_state) |
SI_ATOM_BIT(dpbb_state) |
SI_ATOM_BIT(msaa_config) |
SI_ATOM_BIT(sample_mask) |
SI_ATOM_BIT(cb_render_state) |
SI_ATOM_BIT(blend_color) |
SI_ATOM_BIT(clip_regs) |
SI_ATOM_BIT(clip_state) |
+ SI_ATOM_BIT(guardband) |
SI_ATOM_BIT(scissors) |
SI_ATOM_BIT(viewports) |
SI_ATOM_BIT(stencil_ref) |
SI_ATOM_BIT(spi_map) |
SI_ATOM_BIT(scratch_state));
}
struct si_shader_data {
uint32_t sh_base[SI_NUM_SHADERS];
};
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 5588c9a2c53..e33e235620a 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1270,24 +1270,22 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
if (sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_POINT_MODE])
rast_prim = PIPE_PRIM_POINTS;
else
rast_prim = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
} else
rast_prim = info->mode;
if (rast_prim != sctx->current_rast_prim) {
bool old_is_poly = sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES;
bool new_is_poly = rast_prim >= PIPE_PRIM_TRIANGLES;
- if (old_is_poly != new_is_poly) {
- sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
- si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
- }
+ if (old_is_poly != new_is_poly)
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband);
sctx->current_rast_prim = rast_prim;
sctx->do_update_shaders = true;
}
if (sctx->tes_shader.cso &&
sctx->screen->has_ls_vgpr_init_bug) {
/* Determine whether the LS VGPR fix should be applied.
*
* It is only required when num input CPs > num output CPs,
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index bffb1f91827..97b1b89b48b 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -129,28 +129,42 @@ static void si_emit_one_scissor(struct si_context *ctx,
radeon_emit(cs, S_028250_TL_X(final.minx) |
S_028250_TL_Y(final.miny) |
S_028250_WINDOW_OFFSET_DISABLE(1));
radeon_emit(cs, S_028254_BR_X(final.maxx) |
S_028254_BR_Y(final.maxy));
}
/* the range is [-MAX, MAX] */
#define SI_MAX_VIEWPORT_RANGE 32768
-static void si_emit_guardband(struct si_context *ctx,
- struct si_signed_scissor *vp_as_scissor)
+static void si_emit_guardband(struct si_context *ctx)
{
+ const struct si_signed_scissor *vp_as_scissor;
+ struct si_signed_scissor max_vp_scissor;
struct radeon_winsys_cs *cs = ctx->gfx_cs;
struct pipe_viewport_state vp;
float left, top, right, bottom, max_range, guardband_x, guardband_y;
float discard_x, discard_y;
+ if (ctx->vs_writes_viewport_index) {
+ /* Shaders can draw to any viewport. Make a union of all
+ * viewports. */
+ max_vp_scissor = ctx->viewports.as_scissor[0];
+ for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) {
+ si_scissor_make_union(&max_vp_scissor,
+ &ctx->viewports.as_scissor[i]);
+ }
+ vp_as_scissor = &max_vp_scissor;
+ } else {
+ vp_as_scissor = &ctx->viewports.as_scissor[0];
+ }
+
/* Reconstruct the viewport transformation from the scissor. */
vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
/* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
if (vp_as_scissor->minx == vp_as_scissor->maxx)
vp.scale[0] = 0.5;
if (vp_as_scissor->miny == vp_as_scissor->maxy)
@@ -209,59 +223,49 @@ static void si_emit_guardband(struct si_context *ctx,
radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
}
static void si_emit_scissors(struct si_context *ctx)
{
struct radeon_winsys_cs *cs = ctx->gfx_cs;
struct pipe_scissor_state *states = ctx->scissors.states;
unsigned mask = ctx->scissors.dirty_mask;
bool scissor_enabled = false;
- struct si_signed_scissor max_vp_scissor;
- int i;
if (ctx->queued.named.rasterizer)
scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
/* The simple case: Only 1 viewport is active. */
if (!ctx->vs_writes_viewport_index) {
struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
if (!(mask & 1))
return;
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
- si_emit_guardband(ctx, vp);
ctx->scissors.dirty_mask &= ~1; /* clear one bit */
return;
}
- /* Shaders can draw to any viewport. Make a union of all viewports. */
- max_vp_scissor = ctx->viewports.as_scissor[0];
- for (i = 1; i < SI_MAX_VIEWPORTS; i++)
- si_scissor_make_union(&max_vp_scissor,
- &ctx->viewports.as_scissor[i]);
-
while (mask) {
int start, count, i;
u_bit_scan_consecutive_range(&mask, &start, &count);
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
start * 4 * 2, count * 2);
for (i = start; i < start+count; i++) {
si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
scissor_enabled ? &states[i] : NULL);
}
}
- si_emit_guardband(ctx, &max_vp_scissor);
ctx->scissors.dirty_mask = 0;
}
static void si_set_viewport_states(struct pipe_context *pctx,
unsigned start_slot,
unsigned num_viewports,
const struct pipe_viewport_state *state)
{
struct si_context *ctx = (struct si_context *)pctx;
unsigned mask;
@@ -273,20 +277,21 @@ static void si_set_viewport_states(struct pipe_context *pctx,
ctx->viewports.states[index] = state[i];
si_get_scissor_from_viewport(ctx, &state[i],
&ctx->viewports.as_scissor[index]);
}
mask = ((1 << num_viewports) - 1) << start_slot;
ctx->viewports.dirty_mask |= mask;
ctx->viewports.depth_range_dirty_mask |= mask;
ctx->scissors.dirty_mask |= mask;
si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
}
static void si_emit_one_viewport(struct si_context *ctx,
struct pipe_viewport_state *state)
{
struct radeon_winsys_cs *cs = ctx->gfx_cs;
radeon_emit(cs, fui(state->scale[0]));
radeon_emit(cs, fui(state->translate[0]));
@@ -412,30 +417,37 @@ void si_update_vs_viewport_state(struct si_context *ctx)
if (ctx->vs_disables_clipping_viewport != vs_window_space) {
ctx->vs_disables_clipping_viewport = vs_window_space;
ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
}
/* Viewport index handling. */
+ if (ctx->vs_writes_viewport_index == info->writes_viewport_index)
+ return;
+
+ /* This changes how the guardband is computed. */
ctx->vs_writes_viewport_index = info->writes_viewport_index;
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
+
if (!ctx->vs_writes_viewport_index)
return;
if (ctx->scissors.dirty_mask)
si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
if (ctx->viewports.dirty_mask ||
ctx->viewports.depth_range_dirty_mask)
si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
}
void si_init_viewport_functions(struct si_context *ctx)
{
+ ctx->atoms.s.guardband.emit = si_emit_guardband;
ctx->atoms.s.scissors.emit = si_emit_scissors;
ctx->atoms.s.viewports.emit = si_emit_viewport_states;
ctx->b.set_scissor_states = si_set_scissor_states;
ctx->b.set_viewport_states = si_set_viewport_states;
}
--
2.17.0
More information about the mesa-dev
mailing list