[Mesa-dev] [PATCH] radeonsi: implement ARB_shader_group_vote

Nicolai Hähnle nhaehnle at gmail.com
Thu Mar 30 08:48:50 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 docs/features.txt                        |  2 +-
 docs/relnotes/17.1.0.html                |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c   |  4 +-
 src/gallium/drivers/radeonsi/si_shader.c | 82 ++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index d707f01..1e145e1 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -288,21 +288,21 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
   GL_ARB_parallel_shader_compile                        not started, but Chia-I Wu did some related work in 2014
   GL_ARB_pipeline_statistics_query                      DONE (i965, nvc0, radeonsi, softpipe, swr)
   GL_ARB_post_depth_coverage                            DONE (i965)
   GL_ARB_robustness_isolation                           not started
   GL_ARB_sample_locations                               not started
   GL_ARB_seamless_cubemap_per_texture                   DONE (i965, nvc0, radeonsi, r600, softpipe, swr)
   GL_ARB_shader_atomic_counter_ops                      DONE (i965/gen7+, nvc0, radeonsi, softpipe)
   GL_ARB_shader_ballot                                  not started
   GL_ARB_shader_clock                                   DONE (i965/gen7+, radeonsi)
   GL_ARB_shader_draw_parameters                         DONE (i965, nvc0, radeonsi)
-  GL_ARB_shader_group_vote                              DONE (nvc0)
+  GL_ARB_shader_group_vote                              DONE (nvc0, radeonsi)
   GL_ARB_shader_stencil_export                          DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr)
   GL_ARB_shader_viewport_layer_array                    DONE (i965/gen6+)
   GL_ARB_sparse_buffer                                  not started
   GL_ARB_sparse_texture                                 not started
   GL_ARB_sparse_texture2                                not started
   GL_ARB_sparse_texture_clamp                           not started
   GL_ARB_texture_filter_minmax                          not started
   GL_ARB_transform_feedback_overflow_query              DONE (i965/gen6+)
   GL_KHR_blend_equation_advanced_coherent               DONE (i965/gen9+)
   GL_KHR_no_error                                       not started
diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
index 52b35b5..38bc1e8 100644
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -39,20 +39,21 @@ TBD.
 
 <h2>New features</h2>
 
 <p>
 Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
 <li>GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe</li>
 <li>GL_ARB_shader_clock on radeonsi</li>
+<li>GL_ARB_shader_group_vote on radeonsi</li>
 <li>GL_ARB_transform_feedback2 on i965/gen6</li>
 <li>GL_ARB_transform_feedback_overflow_query on i965/gen6+</li>
 <li>Geometry shaders enabled on swr</li>
 </ul>
 
 <h2>Bug fixes</h2>
 
 <ul>
 </ul>
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 6944c7c..688900e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -417,20 +417,23 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
 	case PIPE_CAP_DOUBLES:
 	case PIPE_CAP_TGSI_TEX_TXF_LZ:
 		return 1;
 
 	case PIPE_CAP_INT64:
 	case PIPE_CAP_INT64_DIVMOD:
 	case PIPE_CAP_TGSI_CLOCK:
 		return HAVE_LLVM >= 0x0309;
 
+	case PIPE_CAP_TGSI_VOTE:
+		return HAVE_LLVM >= 0x0400;
+
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
 		return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
 
 	case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
 		return (sscreen->b.info.drm_major == 2 &&
 			sscreen->b.info.drm_minor >= 43) ||
 		       sscreen->b.info.drm_major == 3;
 
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 		/* 2D tiling on CIK is supported since DRM 2.35.0 */
@@ -471,21 +474,20 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 
 	/* Unsupported features. */
 	case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
 	case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
 	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_VERTEXID_NOBASE:
 	case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
-	case PIPE_CAP_TGSI_VOTE:
 	case PIPE_CAP_MAX_WINDOW_RECTANGLES:
 	case PIPE_CAP_NATIVE_FENCE_FD:
 	case PIPE_CAP_TGSI_FS_FBFETCH:
 	case PIPE_CAP_TGSI_MUL_ZERO_WINS:
 	case PIPE_CAP_UMA:
 		return 0;
 
 	case PIPE_CAP_QUERY_BUFFER_OBJECT:
 		return si_have_tgsi_compute(sscreen);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 415d13b..737d005 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5058,20 +5058,98 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 				llvm_chan, attr_number, params,
 				i, j);
 		} else {
 			emit_data->output[chan] = ac_build_fs_interp_mov(&ctx->ac,
 				lp_build_const_int32(gallivm, 2), /* P0 */
 				llvm_chan, attr_number, params);
 		}
 	}
 }
 
+static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
+				   LLVMValueRef value)
+{
+	struct gallivm_state *gallivm = &ctx->gallivm;
+	LLVMValueRef args[3] = {
+		value,
+		ctx->i32_0,
+		LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+	};
+
+	if (LLVMTypeOf(value) != ctx->i32)
+		args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
+
+	return lp_build_intrinsic(gallivm->builder,
+				  "llvm.amdgcn.icmp.i32",
+				  ctx->i64, args, 3,
+				  LP_FUNC_ATTR_NOUNWIND |
+				  LP_FUNC_ATTR_READNONE |
+				  LP_FUNC_ATTR_CONVERGENT);
+}
+
+static void vote_all_emit(
+	const struct lp_build_tgsi_action *action,
+	struct lp_build_tgsi_context *bld_base,
+	struct lp_build_emit_data *emit_data)
+{
+	struct si_shader_context *ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = &ctx->gallivm;
+	LLVMValueRef active_set, vote_set;
+	LLVMValueRef tmp;
+
+	active_set = si_emit_ballot(ctx, ctx->i32_1);
+	vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+	tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
+	emit_data->output[emit_data->chan] =
+		LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_any_emit(
+	const struct lp_build_tgsi_action *action,
+	struct lp_build_tgsi_context *bld_base,
+	struct lp_build_emit_data *emit_data)
+{
+	struct si_shader_context *ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = &ctx->gallivm;
+	LLVMValueRef vote_set;
+	LLVMValueRef tmp;
+
+	vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+	tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+			    vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+	emit_data->output[emit_data->chan] =
+		LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_eq_emit(
+	const struct lp_build_tgsi_action *action,
+	struct lp_build_tgsi_context *bld_base,
+	struct lp_build_emit_data *emit_data)
+{
+	struct si_shader_context *ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = &ctx->gallivm;
+	LLVMValueRef active_set, vote_set;
+	LLVMValueRef all, none, tmp;
+
+	active_set = si_emit_ballot(ctx, ctx->i32_1);
+	vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+	all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
+	none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+			     vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+	tmp = LLVMBuildOr(gallivm->builder, all, none, "");
+	emit_data->output[emit_data->chan] =
+		LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
 static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
 				       struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
 	LLVMValueRef imm;
 	unsigned stream;
 
 	assert(src0.File == TGSI_FILE_IMMEDIATE);
 
@@ -6501,20 +6579,24 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
 
 	bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
 
 	bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit;
 
 	bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
 	bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
 	bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
 	bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
 
+	bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
+	bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
+	bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
+
 	bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
 	bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
 	bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 }
 
 #define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
 #define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
 
 /* Return true if the PARAM export has been eliminated. */
 static bool si_eliminate_const_output(struct si_shader_context *ctx,
-- 
2.9.3



More information about the mesa-dev mailing list