Mesa (main): radeonsi: skip buffer_atomic_add(ptr, n) when n=0 in the prim discard CS
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Jun 21 19:17:04 UTC 2021
Module: Mesa
Branch: main
Commit: 4b5eb336e18a8ccc8425e107e316a239a1f4f943
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4b5eb336e18a8ccc8425e107e316a239a1f4f943
Author: Marek Olšák <marek.olsak at amd.com>
Date: Tue Jun 1 00:35:34 2021 -0400
radeonsi: skip buffer_atomic_add(ptr, n) when n=0 in the prim discard CS
This improves performance of the shader nicely.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11102>
---
src/gallium/drivers/radeonsi/si_compute_prim_discard.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
index 61b42c92990..54b58bfadc0 100644
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -261,7 +261,8 @@ struct si_thread0_section {
/* Enter a section that only executes on thread 0. */
static void si_enter_thread0_section(struct si_shader_context *ctx,
- struct si_thread0_section *section, LLVMValueRef thread_id)
+ struct si_thread0_section *section, LLVMValueRef thread_id,
+ LLVMValueRef check_nonzero)
{
section->ctx = ctx;
section->vgpr_result = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "result0");
@@ -274,8 +275,13 @@ static void si_enter_thread0_section(struct si_shader_context *ctx,
*
* It could just be s_and_saveexec_b64 s, 1.
*/
- ac_build_ifcc(&ctx->ac, LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, thread_id, ctx->ac.i32_0, ""),
- 12601);
+ LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, thread_id, ctx->ac.i32_0, "");
+ if (check_nonzero) {
+ cond = LLVMBuildAnd(ctx->ac.builder, cond,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, check_nonzero,
+ ctx->ac.i32_0, ""), "");
+ }
+ ac_build_ifcc(&ctx->ac, cond, 12601);
}
/* Exit a section that only executes on thread 0 and broadcast the result
@@ -537,7 +543,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
last_strip_start = LLVMBuildAdd(builder, last_strip_start, ctx->ac.i32_1, "");
struct si_thread0_section section;
- si_enter_thread0_section(ctx, §ion, thread_id);
+ si_enter_thread0_section(ctx, §ion, thread_id, NULL);
/* This must be done in the thread 0 section, because
* we expect PrimID to be 0 for the whole first wave
@@ -664,7 +670,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
/* Execute atomic_add on the vertex count. */
struct si_thread0_section section;
- si_enter_thread0_section(ctx, §ion, thread_id);
+ si_enter_thread0_section(ctx, §ion, thread_id, num_prims_accepted);
{
if (VERTEX_COUNTER_GDS_MODE == 0) {
LLVMValueRef num_indices = LLVMBuildMul(
More information about the mesa-commit
mailing list