[Mesa-dev] [PATCH] radeonsi: implement ARB_shader_group_vote

Marek Olšák maraeo at gmail.com
Thu Mar 30 22:01:45 UTC 2017


Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Thu, Mar 30, 2017 at 10:48 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> ---
>  docs/features.txt                        |  2 +-
>  docs/relnotes/17.1.0.html                |  1 +
>  src/gallium/drivers/radeonsi/si_pipe.c   |  4 +-
>  src/gallium/drivers/radeonsi/si_shader.c | 82 ++++++++++++++++++++++++++++++++
>  4 files changed, 87 insertions(+), 2 deletions(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index d707f01..1e145e1 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -288,21 +288,21 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
>    GL_ARB_parallel_shader_compile                        not started, but Chia-I Wu did some related work in 2014
>    GL_ARB_pipeline_statistics_query                      DONE (i965, nvc0, radeonsi, softpipe, swr)
>    GL_ARB_post_depth_coverage                            DONE (i965)
>    GL_ARB_robustness_isolation                           not started
>    GL_ARB_sample_locations                               not started
>    GL_ARB_seamless_cubemap_per_texture                   DONE (i965, nvc0, radeonsi, r600, softpipe, swr)
>    GL_ARB_shader_atomic_counter_ops                      DONE (i965/gen7+, nvc0, radeonsi, softpipe)
>    GL_ARB_shader_ballot                                  not started
>    GL_ARB_shader_clock                                   DONE (i965/gen7+, radeonsi)
>    GL_ARB_shader_draw_parameters                         DONE (i965, nvc0, radeonsi)
> -  GL_ARB_shader_group_vote                              DONE (nvc0)
> +  GL_ARB_shader_group_vote                              DONE (nvc0, radeonsi)
>    GL_ARB_shader_stencil_export                          DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr)
>    GL_ARB_shader_viewport_layer_array                    DONE (i965/gen6+)
>    GL_ARB_sparse_buffer                                  not started
>    GL_ARB_sparse_texture                                 not started
>    GL_ARB_sparse_texture2                                not started
>    GL_ARB_sparse_texture_clamp                           not started
>    GL_ARB_texture_filter_minmax                          not started
>    GL_ARB_transform_feedback_overflow_query              DONE (i965/gen6+)
>    GL_KHR_blend_equation_advanced_coherent               DONE (i965/gen9+)
>    GL_KHR_no_error                                       not started
> diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
> index 52b35b5..38bc1e8 100644
> --- a/docs/relnotes/17.1.0.html
> +++ b/docs/relnotes/17.1.0.html
> @@ -39,20 +39,21 @@ TBD.
>
>  <h2>New features</h2>
>
>  <p>
>  Note: some of the new features are only available with certain drivers.
>  </p>
>
>  <ul>
>  <li>GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe</li>
>  <li>GL_ARB_shader_clock on radeonsi</li>
> +<li>GL_ARB_shader_group_vote on radeonsi</li>
>  <li>GL_ARB_transform_feedback2 on i965/gen6</li>
>  <li>GL_ARB_transform_feedback_overflow_query on i965/gen6+</li>
>  <li>Geometry shaders enabled on swr</li>
>  </ul>
>
>  <h2>Bug fixes</h2>
>
>  <ul>
>  </ul>
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 6944c7c..688900e 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -417,20 +417,23 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>         case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
>         case PIPE_CAP_DOUBLES:
>         case PIPE_CAP_TGSI_TEX_TXF_LZ:
>                 return 1;
>
>         case PIPE_CAP_INT64:
>         case PIPE_CAP_INT64_DIVMOD:
>         case PIPE_CAP_TGSI_CLOCK:
>                 return HAVE_LLVM >= 0x0309;
>
> +       case PIPE_CAP_TGSI_VOTE:
> +               return HAVE_LLVM >= 0x0400;
> +
>         case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
>                 return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
>
>         case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
>                 return (sscreen->b.info.drm_major == 2 &&
>                         sscreen->b.info.drm_minor >= 43) ||
>                        sscreen->b.info.drm_major == 3;
>
>         case PIPE_CAP_TEXTURE_MULTISAMPLE:
>                 /* 2D tiling on CIK is supported since DRM 2.35.0 */
> @@ -471,21 +474,20 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>
>         /* Unsupported features. */
>         case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
>         case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
>         case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
>         case PIPE_CAP_USER_VERTEX_BUFFERS:
>         case PIPE_CAP_FAKE_SW_MSAA:
>         case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
>         case PIPE_CAP_VERTEXID_NOBASE:
>         case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
> -       case PIPE_CAP_TGSI_VOTE:
>         case PIPE_CAP_MAX_WINDOW_RECTANGLES:
>         case PIPE_CAP_NATIVE_FENCE_FD:
>         case PIPE_CAP_TGSI_FS_FBFETCH:
>         case PIPE_CAP_TGSI_MUL_ZERO_WINS:
>         case PIPE_CAP_UMA:
>                 return 0;
>
>         case PIPE_CAP_QUERY_BUFFER_OBJECT:
>                 return si_have_tgsi_compute(sscreen);
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 415d13b..737d005 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -5058,20 +5058,98 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
>                                 llvm_chan, attr_number, params,
>                                 i, j);
>                 } else {
>                         emit_data->output[chan] = ac_build_fs_interp_mov(&ctx->ac,
>                                 lp_build_const_int32(gallivm, 2), /* P0 */
>                                 llvm_chan, attr_number, params);
>                 }
>         }
>  }
>
> +static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
> +                                  LLVMValueRef value)
> +{
> +       struct gallivm_state *gallivm = &ctx->gallivm;
> +       LLVMValueRef args[3] = {
> +               value,
> +               ctx->i32_0,
> +               LLVMConstInt(ctx->i32, LLVMIntNE, 0)
> +       };
> +
> +       if (LLVMTypeOf(value) != ctx->i32)
> +               args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
> +
> +       return lp_build_intrinsic(gallivm->builder,
> +                                 "llvm.amdgcn.icmp.i32",
> +                                 ctx->i64, args, 3,
> +                                 LP_FUNC_ATTR_NOUNWIND |
> +                                 LP_FUNC_ATTR_READNONE |
> +                                 LP_FUNC_ATTR_CONVERGENT);
> +}
> +
> +static void vote_all_emit(
> +       const struct lp_build_tgsi_action *action,
> +       struct lp_build_tgsi_context *bld_base,
> +       struct lp_build_emit_data *emit_data)
> +{
> +       struct si_shader_context *ctx = si_shader_context(bld_base);
> +       struct gallivm_state *gallivm = &ctx->gallivm;
> +       LLVMValueRef active_set, vote_set;
> +       LLVMValueRef tmp;
> +
> +       active_set = si_emit_ballot(ctx, ctx->i32_1);
> +       vote_set = si_emit_ballot(ctx, emit_data->args[0]);
> +
> +       tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
> +       emit_data->output[emit_data->chan] =
> +               LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
> +}
> +
> +static void vote_any_emit(
> +       const struct lp_build_tgsi_action *action,
> +       struct lp_build_tgsi_context *bld_base,
> +       struct lp_build_emit_data *emit_data)
> +{
> +       struct si_shader_context *ctx = si_shader_context(bld_base);
> +       struct gallivm_state *gallivm = &ctx->gallivm;
> +       LLVMValueRef vote_set;
> +       LLVMValueRef tmp;
> +
> +       vote_set = si_emit_ballot(ctx, emit_data->args[0]);
> +
> +       tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
> +                           vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
> +       emit_data->output[emit_data->chan] =
> +               LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
> +}
> +
> +static void vote_eq_emit(
> +       const struct lp_build_tgsi_action *action,
> +       struct lp_build_tgsi_context *bld_base,
> +       struct lp_build_emit_data *emit_data)
> +{
> +       struct si_shader_context *ctx = si_shader_context(bld_base);
> +       struct gallivm_state *gallivm = &ctx->gallivm;
> +       LLVMValueRef active_set, vote_set;
> +       LLVMValueRef all, none, tmp;
> +
> +       active_set = si_emit_ballot(ctx, ctx->i32_1);
> +       vote_set = si_emit_ballot(ctx, emit_data->args[0]);
> +
> +       all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
> +       none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
> +                            vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
> +       tmp = LLVMBuildOr(gallivm->builder, all, none, "");
> +       emit_data->output[emit_data->chan] =
> +               LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
> +}
> +
>  static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
>                                        struct lp_build_emit_data *emit_data)
>  {
>         struct si_shader_context *ctx = si_shader_context(bld_base);
>         struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
>         LLVMValueRef imm;
>         unsigned stream;
>
>         assert(src0.File == TGSI_FILE_IMMEDIATE);
>
> @@ -6501,20 +6579,24 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
>
>         bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
>
>         bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit;
>
>         bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
>         bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
>         bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
>         bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
>
> +       bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
> +       bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
> +       bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
> +
>         bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
>         bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
>         bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
>  }
>
>  #define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
>  #define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
>
>  /* Return true if the PARAM export has been eliminated. */
>  static bool si_eliminate_const_output(struct si_shader_context *ctx,
> --
> 2.9.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list