Mesa (main): radeonsi: separate culling code from VS/TES (to be reused by GS)

Sat Nov 20 00:33:41 UTC 2021

Module: Mesa
Branch: main
Commit: 2418da2d4afe5534ae1bc44a24054d43cf9de0a5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2418da2d4afe5534ae1bc44a24054d43cf9de0a5

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Nov 16 19:38:41 2021 -0500

radeonsi: separate culling code from VS/TES (to be reused by GS)

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13829>

---

 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 131 +++++++++++++-----------
 1 file changed, 72 insertions(+), 59 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 6d3bc0bba3a..dee70769bfc 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -799,9 +799,12 @@ static void gfx10_build_primitive_accepted(struct ac_llvm_context *ac, LLVMValue
 
    ac_build_ifcc(&ctx->ac, accepted, 0);
    LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_1, gs_accepted);
-   for (unsigned vtx = 0; vtx < num_vertices; vtx++) {
-      LLVMBuildStore(ctx->ac.builder, ctx->ac.i8_1,
-                     si_build_gep_i8(ctx, gs_vtxptr[vtx], lds_byte0_accept_flag));
+
+   if (gs_vtxptr) {
+      for (unsigned vtx = 0; vtx < num_vertices; vtx++) {
+         LLVMBuildStore(ctx->ac.builder, ctx->ac.i8_1,
+                        si_build_gep_i8(ctx, gs_vtxptr[vtx], lds_byte0_accept_flag));
+      }
    }
    ac_build_endif(&ctx->ac, 0);
 }
@@ -837,6 +840,71 @@ static bool add_clipdist_bits_for_clipvertex(struct si_shader_context *ctx,
    return added;
 }
 
+static void cull_primitive(struct si_shader_context *ctx,
+                           LLVMValueRef pos[3][4], LLVMValueRef clipdist_accepted,
+                           LLVMValueRef out_prim_accepted, LLVMValueRef gs_vtxptr_accept[3])
+{
+   struct si_shader *shader = ctx->shader;
+   LLVMBuilderRef builder = ctx->ac.builder;
+
+   LLVMValueRef vp_scale[2] = {}, vp_translate[2] = {}, small_prim_precision = NULL;
+   LLVMValueRef clip_half_line_width[2] = {};
+
+   /* Load the viewport state for small prim culling. */
+   bool prim_is_lines = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES;
+   LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->small_prim_cull_info);
+   /* Lines will always use the non-AA viewport transformation. */
+   LLVMValueRef vp = ac_build_load_to_sgpr(&ctx->ac, ptr,
+                                           prim_is_lines ? ctx->ac.i32_1 : ctx->ac.i32_0);
+   vp = LLVMBuildBitCast(builder, vp, ctx->ac.v4f32, "");
+   vp_scale[0] = ac_llvm_extract_elem(&ctx->ac, vp, 0);
+   vp_scale[1] = ac_llvm_extract_elem(&ctx->ac, vp, 1);
+   vp_translate[0] = ac_llvm_extract_elem(&ctx->ac, vp, 2);
+   vp_translate[1] = ac_llvm_extract_elem(&ctx->ac, vp, 3);
+
+   /* Execute culling code. */
+   struct ac_cull_options options = {};
+   options.cull_view_xy = true;
+   options.cull_w = true;
+
+   if (prim_is_lines) {
+      LLVMValueRef terms = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, 2, 0));
+      terms = LLVMBuildBitCast(builder, terms, ctx->ac.v4f32, "");
+      clip_half_line_width[0] = ac_llvm_extract_elem(&ctx->ac, terms, 0);
+      clip_half_line_width[1] = ac_llvm_extract_elem(&ctx->ac, terms, 1);
+      small_prim_precision = ac_llvm_extract_elem(&ctx->ac, terms, 2);
+
+      options.num_vertices = 2;
+      options.cull_small_prims = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT;
+
+      assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE));
+      assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE));
+   } else {
+      /* Get the small prim filter precision. */
+      small_prim_precision = si_unpack_param(ctx, ctx->vs_state_bits, 7, 4);
+      small_prim_precision =
+         LLVMBuildOr(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 0x70, 0), "");
+      small_prim_precision =
+         LLVMBuildShl(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 23, 0), "");
+      small_prim_precision = LLVMBuildBitCast(builder, small_prim_precision, ctx->ac.f32, "");
+
+      options.num_vertices = 3;
+      options.cull_front = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
+      options.cull_back = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
+      options.cull_small_prims = true; /* this would only be false with conservative rasterization */
+      options.cull_zero_area = options.cull_front || options.cull_back;
+   }
+
+   /* Tell ES threads whether their vertex survived. */
+   LLVMValueRef params[] = {
+      out_prim_accepted,
+      (void*)gs_vtxptr_accept,
+   };
+   ac_cull_primitive(&ctx->ac, pos, clipdist_accepted, vp_scale, vp_translate,
+                     small_prim_precision, clip_half_line_width,
+                     &options, gfx10_build_primitive_accepted, params);
+}
+
 /**
  * Cull primitives for NGG VS or TES, then compact vertices, which happens
  * before the VS or TES main function. Return values for the main function.
@@ -1058,62 +1126,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
          has_clipdist_mask ? LLVMBuildICmp(builder, LLVMIntEQ, clipdist_neg_mask, ctx->ac.i8_0, "")
                            : ctx->ac.i1true;
 
-      LLVMValueRef vp_scale[2] = {}, vp_translate[2] = {}, small_prim_precision = NULL;
-      LLVMValueRef clip_half_line_width[2] = {};
-
-      /* Load the viewport state for small prim culling. */
-      bool prim_is_lines = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES;
-      LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->small_prim_cull_info);
-      /* Lines will always use the non-AA viewport transformation. */
-      LLVMValueRef vp = ac_build_load_to_sgpr(&ctx->ac, ptr,
-                                              prim_is_lines ? ctx->ac.i32_1 : ctx->ac.i32_0);
-      vp = LLVMBuildBitCast(builder, vp, ctx->ac.v4f32, "");
-      vp_scale[0] = ac_llvm_extract_elem(&ctx->ac, vp, 0);
-      vp_scale[1] = ac_llvm_extract_elem(&ctx->ac, vp, 1);
-      vp_translate[0] = ac_llvm_extract_elem(&ctx->ac, vp, 2);
-      vp_translate[1] = ac_llvm_extract_elem(&ctx->ac, vp, 3);
-
-      /* Execute culling code. */
-      struct ac_cull_options options = {};
-      options.cull_view_xy = true;
-      options.cull_w = true;
-
-      if (prim_is_lines) {
-         LLVMValueRef terms = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, 2, 0));
-         terms = LLVMBuildBitCast(builder, terms, ctx->ac.v4f32, "");
-         clip_half_line_width[0] = ac_llvm_extract_elem(&ctx->ac, terms, 0);
-         clip_half_line_width[1] = ac_llvm_extract_elem(&ctx->ac, terms, 1);
-         small_prim_precision = ac_llvm_extract_elem(&ctx->ac, terms, 2);
-
-         options.num_vertices = 2;
-         options.cull_small_prims = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT;
-
-         assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE));
-         assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE));
-      } else {
-         /* Get the small prim filter precision. */
-         small_prim_precision = si_unpack_param(ctx, ctx->vs_state_bits, 7, 4);
-         small_prim_precision =
-            LLVMBuildOr(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 0x70, 0), "");
-         small_prim_precision =
-            LLVMBuildShl(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 23, 0), "");
-         small_prim_precision = LLVMBuildBitCast(builder, small_prim_precision, ctx->ac.f32, "");
-
-         options.num_vertices = 3;
-         options.cull_front = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
-         options.cull_back = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
-         options.cull_small_prims = true; /* this would only be false with conservative rasterization */
-         options.cull_zero_area = options.cull_front || options.cull_back;
-      }
-
-      /* Tell ES threads whether their vertex survived. */
-      LLVMValueRef params[] = {
-         gs_accepted,
-         (void*)gs_vtxptr,
-      };
-      ac_cull_primitive(&ctx->ac, pos, clipdist_accepted, vp_scale, vp_translate,
-                        small_prim_precision, clip_half_line_width,
-                        &options, gfx10_build_primitive_accepted, params);
+      cull_primitive(ctx, pos, clipdist_accepted, gs_accepted, gs_vtxptr);
    }
    ac_build_endif(&ctx->ac, 16002);
    ac_build_s_barrier(&ctx->ac);