[Mesa-dev] [PATCH] radeonsi: add support for viewport array (v2)
Marek Olšák
maraeo at gmail.com
Thu Jun 25 07:26:12 PDT 2015
Hi Dave,
The change in si_shader_io_get_unique_index can be dropped. The
function is only used for shaders before GS.
This looks good, but I've had a different plan for this feature:
I'd like the states to be converted into 2 atoms:
1 r600_atom for all 16 viewports
1 r600_atom for all 16 scissors
Each atom should have a bitmask saying which "slots" are dirty. (the same
idea as resource slots)
The "emit" functions should only emit dirty viewports/scissors.
Also, the "emit" functions shouldn't emit non-zero viewports/scissors
if the viewport index isn't written by the hardware VS stage
(si_get_vs_info(sctx)->...). This should keep the same level of
effectiveness as before. When a shader that writes the viewport index
is bound *and* there are any dirty viewports or scissors, that's the
right time to mark the atoms as dirty again, so that non-zero dirty
viewports/scissors are finally emitted.
Marek
On Thu, Jun 25, 2015 at 6:38 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This isn't pretty and I'd suggest it the pm4 interface builder
> could be tweaked to do this more efficently, but I'd need
> guidance on how that would look.
>
> This seems to pass the few piglit tests I threw at it.
>
> v2: handle passing layer/viewport index to fragment shader.
> fix crash in blit changes,
> add support to io_get_unique_index for layer/viewport index
> update docs.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> docs/GL3.txt | 4 +-
> docs/relnotes/10.7.0.html | 3 ++
> src/gallium/drivers/radeonsi/si_blit.c | 8 +--
> src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
> src/gallium/drivers/radeonsi/si_shader.c | 26 +++++++---
> src/gallium/drivers/radeonsi/si_state.c | 66 +++++++++++++++----------
> src/gallium/drivers/radeonsi/si_state.h | 4 +-
> src/gallium/drivers/radeonsi/si_state_shaders.c | 2 -
> 8 files changed, 73 insertions(+), 42 deletions(-)
>
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index 220bcc8..df913bd 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -128,7 +128,7 @@ GL 4.1, GLSL 4.10:
> GL_ARB_separate_shader_objects DONE (all drivers)
> GL_ARB_shader_precision started (Micah)
> GL_ARB_vertex_attrib_64bit DONE (nvc0, softpipe)
> - GL_ARB_viewport_array DONE (i965, nv50, nvc0, r600, llvmpipe)
> + GL_ARB_viewport_array DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
>
>
> GL 4.2, GLSL 4.20:
> @@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30:
> GL_ARB_copy_image DONE (i965) (gallium - in progress, VMware)
> GL_KHR_debug DONE (all drivers)
> GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
> - GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, llvmpipe)
> + GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
> GL_ARB_framebuffer_no_attachments DONE (i965)
> GL_ARB_internalformat_query2 not started
> GL_ARB_invalidate_subdata DONE (all drivers)
> diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
> index e089889..fcc5081 100644
> --- a/docs/relnotes/10.7.0.html
> +++ b/docs/relnotes/10.7.0.html
> @@ -44,8 +44,11 @@ Note: some of the new features are only available with certain drivers.
> </p>
>
> <ul>
> +<li>GL_AMD_vertex_shader_viewport_index on radeonsi</li>
> <li>GL_ARB_framebuffer_no_attachments on i965</li>
> <li>GL_ARB_shader_stencil_export on llvmpipe</li>
> +<li>GL_ARB_viewport_array on radeonsi</li>
> +<li>GL_ARB_fragment_layer_viewport on radeonsi</li>
> </ul>
>
> <h2>Bug fixes</h2>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 1f2c408..6c7b383 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -63,11 +63,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
> util_blitter_save_sample_mask(sctx->blitter,
> sctx->queued.named.sample_mask->sample_mask);
> }
> - if (sctx->queued.named.viewport) {
> - util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport);
> + if (sctx->queued.named.viewport[0]) {
> + util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
> }
> - if (sctx->queued.named.scissor) {
> - util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor->scissor);
> + if (sctx->queued.named.scissor[0]) {
> + util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
> }
> util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
> util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 53ae71a..480a301 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -335,7 +335,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
> return 8;
>
> case PIPE_CAP_MAX_VIEWPORTS:
> - return 1;
> + return 16;
>
> /* Timer queries, present when the clock frequency is non zero. */
> case PIPE_CAP_QUERY_TIMESTAMP:
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 47e5f96..87608a1 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -125,12 +125,16 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
> return 0;
> case TGSI_SEMANTIC_PSIZE:
> return 1;
> + case TGSI_SEMANTIC_LAYER:
> + return 2;
> + case TGSI_SEMANTIC_VIEWPORT_INDEX:
> + return 3;
> case TGSI_SEMANTIC_CLIPDIST:
> assert(index <= 1);
> - return 2 + index;
> - case TGSI_SEMANTIC_GENERIC:
> - assert(index <= 63-4);
> return 4 + index;
> + case TGSI_SEMANTIC_GENERIC:
> + assert(index <= 63-6);
> + return 6 + index;
>
> default:
> assert(0);
> @@ -1128,7 +1132,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
> &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
> LLVMValueRef args[9];
> LLVMValueRef pos_args[4][9] = { { 0 } };
> - LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL;
> + LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
> unsigned semantic_name, semantic_index;
> unsigned target;
> unsigned param_count = 0;
> @@ -1154,7 +1158,12 @@ handle_semantic:
> continue;
> case TGSI_SEMANTIC_LAYER:
> layer_value = outputs[i].values[0];
> - continue;
> + semantic_name = TGSI_SEMANTIC_GENERIC;
> + goto handle_semantic;
> + case TGSI_SEMANTIC_VIEWPORT_INDEX:
> + viewport_index_value = outputs[i].values[0];
> + semantic_name = TGSI_SEMANTIC_GENERIC;
> + goto handle_semantic;
> case TGSI_SEMANTIC_POSITION:
> target = V_008DFC_SQ_EXP_POS;
> break;
> @@ -1220,11 +1229,13 @@ handle_semantic:
> /* Write the misc vector (point size, edgeflag, layer, viewport). */
> if (shader->selector->info.writes_psize ||
> shader->selector->info.writes_edgeflag ||
> + shader->selector->info.writes_viewport_index ||
> shader->selector->info.writes_layer) {
> pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
> shader->selector->info.writes_psize |
> (shader->selector->info.writes_edgeflag << 1) |
> - (shader->selector->info.writes_layer << 2));
> + (shader->selector->info.writes_layer << 2) |
> + (shader->selector->info.writes_viewport_index << 3));
> pos_args[1][1] = uint->zero; /* EXEC mask */
> pos_args[1][2] = uint->zero; /* last export? */
> pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
> @@ -1255,6 +1266,9 @@ handle_semantic:
>
> if (shader->selector->info.writes_layer)
> pos_args[1][7] = layer_value;
> +
> + if (shader->selector->info.writes_viewport_index)
> + pos_args[1][8] = viewport_index_value;
> }
>
> for (i = 0; i < 4; i++)
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 6c18836..752467b 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -489,11 +489,13 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
> S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
> S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
> S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
> + S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
> S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
> S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
> S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
> info->writes_edgeflag ||
> - info->writes_layer) |
> + info->writes_layer ||
> + info->writes_viewport_index) |
> (sctx->queued.named.rasterizer->clip_plane_enable &
> clipdist_mask));
> r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
> @@ -509,20 +511,26 @@ static void si_set_scissor_states(struct pipe_context *ctx,
> const struct pipe_scissor_state *state)
> {
> struct si_context *sctx = (struct si_context *)ctx;
> - struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
> - struct si_pm4_state *pm4 = &scissor->pm4;
> -
> - if (scissor == NULL)
> - return;
> + struct si_state_scissor *scissor;
> + struct si_pm4_state *pm4;
> + int i;
>
> - scissor->scissor = *state;
> - si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
> - S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
> - S_028250_WINDOW_OFFSET_DISABLE(1));
> - si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
> - S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
> + for (i = start_slot; i < start_slot + num_scissors; i++) {
> + int idx = i - start_slot;
> + int offset = i * 4 * 2;
>
> - si_pm4_set_state(sctx, scissor, scissor);
> + scissor = CALLOC_STRUCT(si_state_scissor);
> + if (scissor == NULL)
> + return;
> + pm4 = &scissor->pm4;
> + scissor->scissor = state[idx];
> + si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
> + S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
> + S_028250_WINDOW_OFFSET_DISABLE(1));
> + si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
> + S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
> + si_pm4_set_state(sctx, scissor[i], scissor);
> + }
> }
>
> static void si_set_viewport_states(struct pipe_context *ctx,
> @@ -531,21 +539,29 @@ static void si_set_viewport_states(struct pipe_context *ctx,
> const struct pipe_viewport_state *state)
> {
> struct si_context *sctx = (struct si_context *)ctx;
> - struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
> - struct si_pm4_state *pm4 = &viewport->pm4;
> + struct si_state_viewport *viewport;
> + struct si_pm4_state *pm4;
> + int i;
>
> - if (viewport == NULL)
> - return;
> + for (i = start_slot; i < start_slot + num_viewports; i++) {
> + int idx = i - start_slot;
> + int offset = i * 4 * 6;
>
> - viewport->viewport = *state;
> - si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
> - si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
> - si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
> - si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
> - si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
> - si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
> + viewport = CALLOC_STRUCT(si_state_viewport);
> + if (!viewport)
> + return;
> + pm4 = &viewport->pm4;
> +
> + viewport->viewport = state[idx];
> + si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, fui(state[idx].scale[0]));
> + si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0 + offset, fui(state[idx].translate[0]));
> + si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0 + offset, fui(state[idx].scale[1]));
> + si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0 + offset, fui(state[idx].translate[1]));
> + si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0 + offset, fui(state[idx].scale[2]));
> + si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0 + offset, fui(state[idx].translate[2]));
>
> - si_pm4_set_state(sctx, viewport, viewport);
> + si_pm4_set_state(sctx, viewport[i], viewport);
> + }
> }
>
> /*
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 5e68b16..d1f2dff 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -92,8 +92,8 @@ union si_state {
> struct si_pm4_state *blend_color;
> struct si_pm4_state *clip;
> struct si_state_sample_mask *sample_mask;
> - struct si_state_scissor *scissor;
> - struct si_state_viewport *viewport;
> + struct si_state_scissor *scissor[16];
> + struct si_state_viewport *viewport[16];
> struct si_state_rasterizer *rasterizer;
> struct si_state_dsa *dsa;
> struct si_pm4_state *fb_rs;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 208c852..48128fa 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -187,8 +187,6 @@ static void si_shader_vs(struct si_shader *shader)
> case TGSI_SEMANTIC_POSITION:
> case TGSI_SEMANTIC_PSIZE:
> case TGSI_SEMANTIC_EDGEFLAG:
> - case TGSI_SEMANTIC_VIEWPORT_INDEX:
> - case TGSI_SEMANTIC_LAYER:
> break;
> default:
> nparams++;
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list