[Mesa-dev] [PATCH] radeonsi: implement ARB_shader_group_vote
Nicolai Hähnle
nhaehnle at gmail.com
Thu Mar 30 08:48:50 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
docs/features.txt | 2 +-
docs/relnotes/17.1.0.html | 1 +
src/gallium/drivers/radeonsi/si_pipe.c | 4 +-
src/gallium/drivers/radeonsi/si_shader.c | 82 ++++++++++++++++++++++++++++++++
4 files changed, 87 insertions(+), 2 deletions(-)
diff --git a/docs/features.txt b/docs/features.txt
index d707f01..1e145e1 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -288,21 +288,21 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_ARB_parallel_shader_compile not started, but Chia-I Wu did some related work in 2014
GL_ARB_pipeline_statistics_query DONE (i965, nvc0, radeonsi, softpipe, swr)
GL_ARB_post_depth_coverage DONE (i965)
GL_ARB_robustness_isolation not started
GL_ARB_sample_locations not started
GL_ARB_seamless_cubemap_per_texture DONE (i965, nvc0, radeonsi, r600, softpipe, swr)
GL_ARB_shader_atomic_counter_ops DONE (i965/gen7+, nvc0, radeonsi, softpipe)
GL_ARB_shader_ballot not started
GL_ARB_shader_clock DONE (i965/gen7+, radeonsi)
GL_ARB_shader_draw_parameters DONE (i965, nvc0, radeonsi)
- GL_ARB_shader_group_vote DONE (nvc0)
+ GL_ARB_shader_group_vote DONE (nvc0, radeonsi)
GL_ARB_shader_stencil_export DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr)
GL_ARB_shader_viewport_layer_array DONE (i965/gen6+)
GL_ARB_sparse_buffer not started
GL_ARB_sparse_texture not started
GL_ARB_sparse_texture2 not started
GL_ARB_sparse_texture_clamp not started
GL_ARB_texture_filter_minmax not started
GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+)
GL_KHR_blend_equation_advanced_coherent DONE (i965/gen9+)
GL_KHR_no_error not started
diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
index 52b35b5..38bc1e8 100644
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -39,20 +39,21 @@ TBD.
<h2>New features</h2>
<p>
Note: some of the new features are only available with certain drivers.
</p>
<ul>
<li>GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe</li>
<li>GL_ARB_shader_clock on radeonsi</li>
+<li>GL_ARB_shader_group_vote on radeonsi</li>
<li>GL_ARB_transform_feedback2 on i965/gen6</li>
<li>GL_ARB_transform_feedback_overflow_query on i965/gen6+</li>
<li>Geometry shaders enabled on swr</li>
</ul>
<h2>Bug fixes</h2>
<ul>
</ul>
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 6944c7c..688900e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -417,20 +417,23 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_TGSI_TEX_TXF_LZ:
return 1;
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_CLOCK:
return HAVE_LLVM >= 0x0309;
+ case PIPE_CAP_TGSI_VOTE:
+ return HAVE_LLVM >= 0x0400;
+
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return (sscreen->b.info.drm_major == 2 &&
sscreen->b.info.drm_minor >= 43) ||
sscreen->b.info.drm_major == 3;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
/* 2D tiling on CIK is supported since DRM 2.35.0 */
@@ -471,21 +474,20 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
/* Unsupported features. */
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
- case PIPE_CAP_TGSI_VOTE:
case PIPE_CAP_MAX_WINDOW_RECTANGLES:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_UMA:
return 0;
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return si_have_tgsi_compute(sscreen);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 415d13b..737d005 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5058,20 +5058,98 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
llvm_chan, attr_number, params,
i, j);
} else {
emit_data->output[chan] = ac_build_fs_interp_mov(&ctx->ac,
lp_build_const_int32(gallivm, 2), /* P0 */
llvm_chan, attr_number, params);
}
}
}
+static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
+ LLVMValueRef value)
+{
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMValueRef args[3] = {
+ value,
+ ctx->i32_0,
+ LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+ };
+
+ if (LLVMTypeOf(value) != ctx->i32)
+ args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
+
+ return lp_build_intrinsic(gallivm->builder,
+ "llvm.amdgcn.icmp.i32",
+ ctx->i64, args, 3,
+ LP_FUNC_ATTR_NOUNWIND |
+ LP_FUNC_ATTR_READNONE |
+ LP_FUNC_ATTR_CONVERGENT);
+}
+
+static void vote_all_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMValueRef active_set, vote_set;
+ LLVMValueRef tmp;
+
+ active_set = si_emit_ballot(ctx, ctx->i32_1);
+ vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+ tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_any_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMValueRef vote_set;
+ LLVMValueRef tmp;
+
+ vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+ tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+ vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_eq_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMValueRef active_set, vote_set;
+ LLVMValueRef all, none, tmp;
+
+ active_set = si_emit_ballot(ctx, ctx->i32_1);
+ vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+ all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
+ none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+ vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+ tmp = LLVMBuildOr(gallivm->builder, all, none, "");
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
LLVMValueRef imm;
unsigned stream;
assert(src0.File == TGSI_FILE_IMMEDIATE);
@@ -6501,20 +6579,24 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit;
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
+ bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
+ bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
+ bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
+
bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
}
#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
/* Return true if the PARAM export has been eliminated. */
static bool si_eliminate_const_output(struct si_shader_context *ctx,
--
2.9.3
More information about the mesa-dev
mailing list