[Mesa-dev] [PATCH] radeonsi: add support for viewport array (v2)

Marek Olšák maraeo at gmail.com
Thu Jun 25 07:26:12 PDT 2015


Hi Dave,

The change in si_shader_io_get_unique_index can be dropped. The
function is only used for shaders before GS.

This looks good, but I've had a different plan for this feature:

I'd like the states to be converted into 2 atoms:

1 r600_atom for all 16 viewports
1 r600_atom for all 16 scissors

Each atom should have a bitmask saying which "slots" are dirty. (the same
idea as resource slots)

The "emit" functions should only emit dirty viewports/scissors.

Also, the "emit" functions shouldn't emit non-zero viewports/scissors
if the viewport index isn't written by the hardware VS stage
(si_get_vs_info(sctx)->...). This should keep the same level of
effectiveness as before. When a shader that writes the viewport index
is bound *and* there are any dirty viewports or scissors, that's the
right time to mark the atoms as dirty again, so that non-zero dirty
viewports/scissors are finally emitted.

Marek

On Thu, Jun 25, 2015 at 6:38 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This isn't pretty and I'd suggest it the pm4 interface builder
> could be tweaked to do this more efficently, but I'd need
> guidance on how that would look.
>
> This seems to pass the few piglit tests I threw at it.
>
> v2: handle passing layer/viewport index to fragment shader.
> fix crash in blit changes,
> add support to io_get_unique_index for layer/viewport index
> update docs.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  docs/GL3.txt                                    |  4 +-
>  docs/relnotes/10.7.0.html                       |  3 ++
>  src/gallium/drivers/radeonsi/si_blit.c          |  8 +--
>  src/gallium/drivers/radeonsi/si_pipe.c          |  2 +-
>  src/gallium/drivers/radeonsi/si_shader.c        | 26 +++++++---
>  src/gallium/drivers/radeonsi/si_state.c         | 66 +++++++++++++++----------
>  src/gallium/drivers/radeonsi/si_state.h         |  4 +-
>  src/gallium/drivers/radeonsi/si_state_shaders.c |  2 -
>  8 files changed, 73 insertions(+), 42 deletions(-)
>
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index 220bcc8..df913bd 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -128,7 +128,7 @@ GL 4.1, GLSL 4.10:
>    GL_ARB_separate_shader_objects                       DONE (all drivers)
>    GL_ARB_shader_precision                              started (Micah)
>    GL_ARB_vertex_attrib_64bit                           DONE (nvc0, softpipe)
> -  GL_ARB_viewport_array                                DONE (i965, nv50, nvc0, r600, llvmpipe)
> +  GL_ARB_viewport_array                                DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
>
>
>  GL 4.2, GLSL 4.20:
> @@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30:
>    GL_ARB_copy_image                                    DONE (i965) (gallium - in progress, VMware)
>    GL_KHR_debug                                         DONE (all drivers)
>    GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
> -  GL_ARB_fragment_layer_viewport                       DONE (nv50, nvc0, r600, llvmpipe)
> +  GL_ARB_fragment_layer_viewport                       DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
>    GL_ARB_framebuffer_no_attachments                    DONE (i965)
>    GL_ARB_internalformat_query2                         not started
>    GL_ARB_invalidate_subdata                            DONE (all drivers)
> diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
> index e089889..fcc5081 100644
> --- a/docs/relnotes/10.7.0.html
> +++ b/docs/relnotes/10.7.0.html
> @@ -44,8 +44,11 @@ Note: some of the new features are only available with certain drivers.
>  </p>
>
>  <ul>
> +<li>GL_AMD_vertex_shader_viewport_index on radeonsi</li>
>  <li>GL_ARB_framebuffer_no_attachments on i965</li>
>  <li>GL_ARB_shader_stencil_export on llvmpipe</li>
> +<li>GL_ARB_viewport_array on radeonsi</li>
> +<li>GL_ARB_fragment_layer_viewport on radeonsi</li>
>  </ul>
>
>  <h2>Bug fixes</h2>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 1f2c408..6c7b383 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -63,11 +63,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
>                 util_blitter_save_sample_mask(sctx->blitter,
>                                               sctx->queued.named.sample_mask->sample_mask);
>         }
> -       if (sctx->queued.named.viewport) {
> -               util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport);
> +       if (sctx->queued.named.viewport[0]) {
> +               util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
>         }
> -       if (sctx->queued.named.scissor) {
> -               util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor->scissor);
> +       if (sctx->queued.named.scissor[0]) {
> +               util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
>         }
>         util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
>         util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 53ae71a..480a301 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -335,7 +335,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>                 return 8;
>
>         case PIPE_CAP_MAX_VIEWPORTS:
> -               return 1;
> +               return 16;
>
>         /* Timer queries, present when the clock frequency is non zero. */
>         case PIPE_CAP_QUERY_TIMESTAMP:
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 47e5f96..87608a1 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -125,12 +125,16 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
>                 return 0;
>         case TGSI_SEMANTIC_PSIZE:
>                 return 1;
> +       case TGSI_SEMANTIC_LAYER:
> +               return 2;
> +       case TGSI_SEMANTIC_VIEWPORT_INDEX:
> +               return 3;
>         case TGSI_SEMANTIC_CLIPDIST:
>                 assert(index <= 1);
> -               return 2 + index;
> -       case TGSI_SEMANTIC_GENERIC:
> -               assert(index <= 63-4);
>                 return 4 + index;
> +       case TGSI_SEMANTIC_GENERIC:
> +               assert(index <= 63-6);
> +               return 6 + index;
>
>         default:
>                 assert(0);
> @@ -1128,7 +1132,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
>                                 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
>         LLVMValueRef args[9];
>         LLVMValueRef pos_args[4][9] = { { 0 } };
> -       LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL;
> +       LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
>         unsigned semantic_name, semantic_index;
>         unsigned target;
>         unsigned param_count = 0;
> @@ -1154,7 +1158,12 @@ handle_semantic:
>                         continue;
>                 case TGSI_SEMANTIC_LAYER:
>                         layer_value = outputs[i].values[0];
> -                       continue;
> +                       semantic_name = TGSI_SEMANTIC_GENERIC;
> +                       goto handle_semantic;
> +               case TGSI_SEMANTIC_VIEWPORT_INDEX:
> +                       viewport_index_value = outputs[i].values[0];
> +                       semantic_name = TGSI_SEMANTIC_GENERIC;
> +                       goto handle_semantic;
>                 case TGSI_SEMANTIC_POSITION:
>                         target = V_008DFC_SQ_EXP_POS;
>                         break;
> @@ -1220,11 +1229,13 @@ handle_semantic:
>         /* Write the misc vector (point size, edgeflag, layer, viewport). */
>         if (shader->selector->info.writes_psize ||
>             shader->selector->info.writes_edgeflag ||
> +           shader->selector->info.writes_viewport_index ||
>             shader->selector->info.writes_layer) {
>                 pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
>                                                       shader->selector->info.writes_psize |
>                                                       (shader->selector->info.writes_edgeflag << 1) |
> -                                                     (shader->selector->info.writes_layer << 2));
> +                                                     (shader->selector->info.writes_layer << 2) |
> +                                                     (shader->selector->info.writes_viewport_index << 3));
>                 pos_args[1][1] = uint->zero; /* EXEC mask */
>                 pos_args[1][2] = uint->zero; /* last export? */
>                 pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
> @@ -1255,6 +1266,9 @@ handle_semantic:
>
>                 if (shader->selector->info.writes_layer)
>                         pos_args[1][7] = layer_value;
> +
> +               if (shader->selector->info.writes_viewport_index)
> +                       pos_args[1][8] = viewport_index_value;
>         }
>
>         for (i = 0; i < 4; i++)
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 6c18836..752467b 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -489,11 +489,13 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
>                 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
>                 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
>                 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
> +               S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
>                 S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
>                 S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
>                 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
>                                             info->writes_edgeflag ||
> -                                           info->writes_layer) |
> +                                           info->writes_layer ||
> +                                            info->writes_viewport_index) |
>                 (sctx->queued.named.rasterizer->clip_plane_enable &
>                  clipdist_mask));
>         r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
> @@ -509,20 +511,26 @@ static void si_set_scissor_states(struct pipe_context *ctx,
>                                    const struct pipe_scissor_state *state)
>  {
>         struct si_context *sctx = (struct si_context *)ctx;
> -       struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
> -       struct si_pm4_state *pm4 = &scissor->pm4;
> -
> -       if (scissor == NULL)
> -               return;
> +       struct si_state_scissor *scissor;
> +       struct si_pm4_state *pm4;
> +       int i;
>
> -       scissor->scissor = *state;
> -       si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
> -                      S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
> -                      S_028250_WINDOW_OFFSET_DISABLE(1));
> -       si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
> -                      S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
> +       for (i = start_slot; i < start_slot + num_scissors; i++) {
> +               int idx = i - start_slot;
> +               int offset = i * 4 * 2;
>
> -       si_pm4_set_state(sctx, scissor, scissor);
> +               scissor = CALLOC_STRUCT(si_state_scissor);
> +               if (scissor == NULL)
> +                       return;
> +               pm4 = &scissor->pm4;
> +               scissor->scissor = state[idx];
> +               si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
> +                              S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
> +                              S_028250_WINDOW_OFFSET_DISABLE(1));
> +               si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
> +                              S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
> +               si_pm4_set_state(sctx, scissor[i], scissor);
> +       }
>  }
>
>  static void si_set_viewport_states(struct pipe_context *ctx,
> @@ -531,21 +539,29 @@ static void si_set_viewport_states(struct pipe_context *ctx,
>                                     const struct pipe_viewport_state *state)
>  {
>         struct si_context *sctx = (struct si_context *)ctx;
> -       struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
> -       struct si_pm4_state *pm4 = &viewport->pm4;
> +       struct si_state_viewport *viewport;
> +       struct si_pm4_state *pm4;
> +       int i;
>
> -       if (viewport == NULL)
> -               return;
> +       for (i = start_slot; i < start_slot + num_viewports; i++) {
> +               int idx = i - start_slot;
> +               int offset = i * 4 * 6;
>
> -       viewport->viewport = *state;
> -       si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
> -       si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
> -       si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
> -       si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
> -       si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
> -       si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
> +               viewport = CALLOC_STRUCT(si_state_viewport);
> +               if (!viewport)
> +                       return;
> +               pm4 = &viewport->pm4;
> +
> +               viewport->viewport = state[idx];
> +               si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, fui(state[idx].scale[0]));
> +               si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0 + offset, fui(state[idx].translate[0]));
> +               si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0 + offset, fui(state[idx].scale[1]));
> +               si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0 + offset, fui(state[idx].translate[1]));
> +               si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0 + offset, fui(state[idx].scale[2]));
> +               si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0 + offset, fui(state[idx].translate[2]));
>
> -       si_pm4_set_state(sctx, viewport, viewport);
> +               si_pm4_set_state(sctx, viewport[i], viewport);
> +       }
>  }
>
>  /*
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 5e68b16..d1f2dff 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -92,8 +92,8 @@ union si_state {
>                 struct si_pm4_state             *blend_color;
>                 struct si_pm4_state             *clip;
>                 struct si_state_sample_mask     *sample_mask;
> -               struct si_state_scissor         *scissor;
> -               struct si_state_viewport        *viewport;
> +               struct si_state_scissor         *scissor[16];
> +               struct si_state_viewport        *viewport[16];
>                 struct si_state_rasterizer      *rasterizer;
>                 struct si_state_dsa             *dsa;
>                 struct si_pm4_state             *fb_rs;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 208c852..48128fa 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -187,8 +187,6 @@ static void si_shader_vs(struct si_shader *shader)
>                 case TGSI_SEMANTIC_POSITION:
>                 case TGSI_SEMANTIC_PSIZE:
>                 case TGSI_SEMANTIC_EDGEFLAG:
> -               case TGSI_SEMANTIC_VIEWPORT_INDEX:
> -               case TGSI_SEMANTIC_LAYER:
>                         break;
>                 default:
>                         nparams++;
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list