[Mesa-dev] [PATCH] radeonsi: implement ARB_shader_group_vote
Marek Olšák
maraeo at gmail.com
Thu Mar 30 22:01:45 UTC 2017
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Marek
On Thu, Mar 30, 2017 at 10:48 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> ---
> docs/features.txt | 2 +-
> docs/relnotes/17.1.0.html | 1 +
> src/gallium/drivers/radeonsi/si_pipe.c | 4 +-
> src/gallium/drivers/radeonsi/si_shader.c | 82 ++++++++++++++++++++++++++++++++
> 4 files changed, 87 insertions(+), 2 deletions(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index d707f01..1e145e1 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -288,21 +288,21 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
> GL_ARB_parallel_shader_compile not started, but Chia-I Wu did some related work in 2014
> GL_ARB_pipeline_statistics_query DONE (i965, nvc0, radeonsi, softpipe, swr)
> GL_ARB_post_depth_coverage DONE (i965)
> GL_ARB_robustness_isolation not started
> GL_ARB_sample_locations not started
> GL_ARB_seamless_cubemap_per_texture DONE (i965, nvc0, radeonsi, r600, softpipe, swr)
> GL_ARB_shader_atomic_counter_ops DONE (i965/gen7+, nvc0, radeonsi, softpipe)
> GL_ARB_shader_ballot not started
> GL_ARB_shader_clock DONE (i965/gen7+, radeonsi)
> GL_ARB_shader_draw_parameters DONE (i965, nvc0, radeonsi)
> - GL_ARB_shader_group_vote DONE (nvc0)
> + GL_ARB_shader_group_vote DONE (nvc0, radeonsi)
> GL_ARB_shader_stencil_export DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr)
> GL_ARB_shader_viewport_layer_array DONE (i965/gen6+)
> GL_ARB_sparse_buffer not started
> GL_ARB_sparse_texture not started
> GL_ARB_sparse_texture2 not started
> GL_ARB_sparse_texture_clamp not started
> GL_ARB_texture_filter_minmax not started
> GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+)
> GL_KHR_blend_equation_advanced_coherent DONE (i965/gen9+)
> GL_KHR_no_error not started
> diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
> index 52b35b5..38bc1e8 100644
> --- a/docs/relnotes/17.1.0.html
> +++ b/docs/relnotes/17.1.0.html
> @@ -39,20 +39,21 @@ TBD.
>
> <h2>New features</h2>
>
> <p>
> Note: some of the new features are only available with certain drivers.
> </p>
>
> <ul>
> <li>GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe</li>
> <li>GL_ARB_shader_clock on radeonsi</li>
> +<li>GL_ARB_shader_group_vote on radeonsi</li>
> <li>GL_ARB_transform_feedback2 on i965/gen6</li>
> <li>GL_ARB_transform_feedback_overflow_query on i965/gen6+</li>
> <li>Geometry shaders enabled on swr</li>
> </ul>
>
> <h2>Bug fixes</h2>
>
> <ul>
> </ul>
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 6944c7c..688900e 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -417,20 +417,23 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
> case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
> case PIPE_CAP_DOUBLES:
> case PIPE_CAP_TGSI_TEX_TXF_LZ:
> return 1;
>
> case PIPE_CAP_INT64:
> case PIPE_CAP_INT64_DIVMOD:
> case PIPE_CAP_TGSI_CLOCK:
> return HAVE_LLVM >= 0x0309;
>
> + case PIPE_CAP_TGSI_VOTE:
> + return HAVE_LLVM >= 0x0400;
> +
> case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
> return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
>
> case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
> return (sscreen->b.info.drm_major == 2 &&
> sscreen->b.info.drm_minor >= 43) ||
> sscreen->b.info.drm_major == 3;
>
> case PIPE_CAP_TEXTURE_MULTISAMPLE:
> /* 2D tiling on CIK is supported since DRM 2.35.0 */
> @@ -471,21 +474,20 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>
> /* Unsupported features. */
> case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
> case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
> case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
> case PIPE_CAP_USER_VERTEX_BUFFERS:
> case PIPE_CAP_FAKE_SW_MSAA:
> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
> case PIPE_CAP_VERTEXID_NOBASE:
> case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
> - case PIPE_CAP_TGSI_VOTE:
> case PIPE_CAP_MAX_WINDOW_RECTANGLES:
> case PIPE_CAP_NATIVE_FENCE_FD:
> case PIPE_CAP_TGSI_FS_FBFETCH:
> case PIPE_CAP_TGSI_MUL_ZERO_WINS:
> case PIPE_CAP_UMA:
> return 0;
>
> case PIPE_CAP_QUERY_BUFFER_OBJECT:
> return si_have_tgsi_compute(sscreen);
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 415d13b..737d005 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -5058,20 +5058,98 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
> llvm_chan, attr_number, params,
> i, j);
> } else {
> emit_data->output[chan] = ac_build_fs_interp_mov(&ctx->ac,
> lp_build_const_int32(gallivm, 2), /* P0 */
> llvm_chan, attr_number, params);
> }
> }
> }
>
> +static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
> + LLVMValueRef value)
> +{
> + struct gallivm_state *gallivm = &ctx->gallivm;
> + LLVMValueRef args[3] = {
> + value,
> + ctx->i32_0,
> + LLVMConstInt(ctx->i32, LLVMIntNE, 0)
> + };
> +
> + if (LLVMTypeOf(value) != ctx->i32)
> + args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
> +
> + return lp_build_intrinsic(gallivm->builder,
> + "llvm.amdgcn.icmp.i32",
> + ctx->i64, args, 3,
> + LP_FUNC_ATTR_NOUNWIND |
> + LP_FUNC_ATTR_READNONE |
> + LP_FUNC_ATTR_CONVERGENT);
> +}
> +
> +static void vote_all_emit(
> + const struct lp_build_tgsi_action *action,
> + struct lp_build_tgsi_context *bld_base,
> + struct lp_build_emit_data *emit_data)
> +{
> + struct si_shader_context *ctx = si_shader_context(bld_base);
> + struct gallivm_state *gallivm = &ctx->gallivm;
> + LLVMValueRef active_set, vote_set;
> + LLVMValueRef tmp;
> +
> + active_set = si_emit_ballot(ctx, ctx->i32_1);
> + vote_set = si_emit_ballot(ctx, emit_data->args[0]);
> +
> + tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
> + emit_data->output[emit_data->chan] =
> + LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
> +}
> +
> +static void vote_any_emit(
> + const struct lp_build_tgsi_action *action,
> + struct lp_build_tgsi_context *bld_base,
> + struct lp_build_emit_data *emit_data)
> +{
> + struct si_shader_context *ctx = si_shader_context(bld_base);
> + struct gallivm_state *gallivm = &ctx->gallivm;
> + LLVMValueRef vote_set;
> + LLVMValueRef tmp;
> +
> + vote_set = si_emit_ballot(ctx, emit_data->args[0]);
> +
> + tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
> + vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
> + emit_data->output[emit_data->chan] =
> + LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
> +}
> +
> +static void vote_eq_emit(
> + const struct lp_build_tgsi_action *action,
> + struct lp_build_tgsi_context *bld_base,
> + struct lp_build_emit_data *emit_data)
> +{
> + struct si_shader_context *ctx = si_shader_context(bld_base);
> + struct gallivm_state *gallivm = &ctx->gallivm;
> + LLVMValueRef active_set, vote_set;
> + LLVMValueRef all, none, tmp;
> +
> + active_set = si_emit_ballot(ctx, ctx->i32_1);
> + vote_set = si_emit_ballot(ctx, emit_data->args[0]);
> +
> + all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
> + none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
> + vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
> + tmp = LLVMBuildOr(gallivm->builder, all, none, "");
> + emit_data->output[emit_data->chan] =
> + LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
> +}
> +
> static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
> LLVMValueRef imm;
> unsigned stream;
>
> assert(src0.File == TGSI_FILE_IMMEDIATE);
>
> @@ -6501,20 +6579,24 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
>
> bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
>
> bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit;
>
> bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
> bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
> bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
> bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
>
> + bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
> + bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
> + bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
> +
> bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
> bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
> bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
> }
>
> #define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
> #define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
>
> /* Return true if the PARAM export has been eliminated. */
> static bool si_eliminate_const_output(struct si_shader_context *ctx,
> --
> 2.9.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list