Mesa (main): ac/llvm: add a callback to ac_cull_triangle to generate code in inner-most block

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sun Jun 20 05:43:00 UTC 2021


Module: Mesa
Branch: main
Commit: 12d2df15f178591d45193b6cc3e093281e2ea0aa
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=12d2df15f178591d45193b6cc3e093281e2ea0aa

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Jun  1 01:09:57 2021 -0400

ac/llvm: add a callback to ac_cull_triangle to generate code in inner-most block

This will reduce jumps in culling code.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11486>

---

 src/amd/llvm/ac_llvm_cull.c                            | 13 ++++++++++---
 src/amd/llvm/ac_llvm_cull.h                            |  7 ++++++-
 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c        |  2 +-
 src/gallium/drivers/radeonsi/si_compute_prim_discard.c |  3 ++-
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/amd/llvm/ac_llvm_cull.c b/src/amd/llvm/ac_llvm_cull.c
index 3c185d69650..028e125b386 100644
--- a/src/amd/llvm/ac_llvm_cull.c
+++ b/src/amd/llvm/ac_llvm_cull.c
@@ -120,7 +120,8 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4
                               LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],
                               LLVMValueRef small_prim_precision, bool cull_view_xy,
                               bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims,
-                              bool use_halfz_clip_space)
+                              bool use_halfz_clip_space, ac_cull_accept_func accept_func,
+                              void *userdata)
 {
    LLVMBuilderRef builder = ctx->builder;
 
@@ -200,6 +201,9 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4
          accepted = LLVMBuildAnd(builder, accepted, visible, "");
       }
 
+      if (accept_func)
+         accept_func(ctx, accepted, userdata);
+
       LLVMBuildStore(builder, accepted, accepted_var);
    }
    ac_build_endif(ctx, 10000000);
@@ -222,11 +226,13 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4
  *                              the rasterizer. Set to num_samples / 2^subpixel_bits.
  *                              subpixel_bits are defined by the quantization mode.
  * \param options               See ac_cull_options.
+ * \param accept_func           Callback invoked in the inner-most branch where the primitive is accepted.
  */
 LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
                               LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
                               LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
-                              struct ac_cull_options *options)
+                              struct ac_cull_options *options, ac_cull_accept_func accept_func,
+                              void *userdata)
 {
    struct ac_position_w_info w;
    ac_analyze_position_w(ctx, pos, &w);
@@ -244,6 +250,7 @@ LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4
    /* View culling and small primitive elimination. */
    accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision,
                         options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z,
-                        options->cull_small_prims, options->use_halfz_clip_space);
+                        options->cull_small_prims, options->use_halfz_clip_space, accept_func,
+                        userdata);
    return accepted;
 }
diff --git a/src/amd/llvm/ac_llvm_cull.h b/src/amd/llvm/ac_llvm_cull.h
index 2c4b7f7da05..5e35111733f 100644
--- a/src/amd/llvm/ac_llvm_cull.h
+++ b/src/amd/llvm/ac_llvm_cull.h
@@ -48,9 +48,14 @@ struct ac_cull_options {
    bool use_halfz_clip_space;
 };
 
+/* Callback invoked in the inner-most branch where the primitive is accepted. */
+typedef void (*ac_cull_accept_func)(struct ac_llvm_context *ctx, LLVMValueRef accepted,
+                                    void *userdata);
+
 LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
                               LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
                               LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
-                              struct ac_cull_options *options);
+                              struct ac_cull_options *options, ac_cull_accept_func accept_func,
+                              void *userdata);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index d72c72e748a..0bcd8b4b3d9 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -985,7 +985,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
       /* Tell ES threads whether their vertex survived. */
       ac_build_ifcc(&ctx->ac,
                     ac_cull_triangle(&ctx->ac, pos, ctx->ac.i1true, vp_scale, vp_translate,
-                                     small_prim_precision, &options),
+                                     small_prim_precision, &options, NULL, NULL),
                     16003);
       {
          LLVMBuildStore(builder, ctx->ac.i32_1, gs_accepted);
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
index c830a857f66..a75086606f4 100644
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -673,7 +673,8 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
 
    LLVMValueRef accepted =
       ac_cull_triangle(&ctx->ac, pos, prim_restart_accepted, vp_scale, vp_translate,
-                       ac_get_arg(&ctx->ac, param_smallprim_precision), &options);
+                       ac_get_arg(&ctx->ac, param_smallprim_precision), &options,
+                       NULL, NULL);
 
    ac_build_optimization_barrier(&ctx->ac, &accepted, false);
    LLVMValueRef accepted_threadmask = ac_get_i1_sgpr_mask(&ctx->ac, accepted);



More information about the mesa-commit mailing list