[Mesa-dev] [PATCH 1/2] gallivm: simplify geometry shader mask handling a bit

sroland at vmware.com sroland at vmware.com
Mon Aug 12 08:34:42 PDT 2013


From: Roland Scheidegger <sroland at vmware.com>

Instead of reducing masks to 0/1 simply use the mask directly as -1.
Also use some signed comparison instead of unsigned (as far as I understand
these values have to be (very) small and signed means llvm doesn't have to
apply additional logic to do the unsigned comparisons the cpu can't do).
Saves some ~15% of all instructions in some test geometry shader here.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |   62 ++++++++++-------------
 1 file changed, 26 insertions(+), 36 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index affe059..d23a977 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -2592,19 +2592,15 @@ sviewinfo_emit(
 }
 
 static LLVMValueRef
-mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
+mask_vec(struct lp_build_tgsi_context *bld_base)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
-   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
-   LLVMValueRef one_vec = bld_base->int_bld.one;
    struct lp_exec_mask *exec_mask = &bld->exec_mask;
 
    if (exec_mask->has_mask) {
-      one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
+      return exec_mask->exec_mask;
    }
-   one_vec = LLVMBuildAnd(builder, one_vec,
-                          lp_build_mask_value(bld->mask), "");
-   return one_vec;
+   return lp_build_mask_value(bld->mask);
 }
 
 static void
@@ -2613,11 +2609,10 @@ increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
                           LLVMValueRef mask)
 {
    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-
    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
-   
-   current_vec = LLVMBuildAdd(builder, current_vec, mask, "");
-   
+
+   current_vec = LLVMBuildSub(builder, current_vec, mask, "");
+
    LLVMBuildStore(builder, current_vec, ptr);
 }
 
@@ -2627,18 +2622,13 @@ clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
                              LLVMValueRef mask)
 {
    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-
    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
-   LLVMValueRef full_mask = lp_build_cmp(&bld_base->uint_bld,
-                                         PIPE_FUNC_NOTEQUAL,
-                                         mask,
-                                         bld_base->uint_bld.zero);
 
    current_vec = lp_build_select(&bld_base->uint_bld,
-                                 full_mask,
+                                 mask,
                                  bld_base->uint_bld.zero,
                                  current_vec);
-   
+
    LLVMBuildStore(builder, current_vec, ptr);
 }
 
@@ -2648,8 +2638,8 @@ clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
                                   LLVMValueRef total_emitted_vertices_vec)
 {
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
-   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
-   LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
+   struct lp_build_context *int_bld = &bld->bld_base.int_bld;
+   LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
                                         total_emitted_vertices_vec,
                                         bld->max_output_vertices_vec);
 
@@ -2666,23 +2656,23 @@ emit_vertex(
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
 
    if (bld->gs_iface->emit_vertex) {
-      LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
+      LLVMValueRef mask = mask_vec(bld_base);
       LLVMValueRef total_emitted_vertices_vec =
          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
-      masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
-                                                      total_emitted_vertices_vec);
+      mask = clamp_mask_to_max_output_vertices(bld, mask,
+                                               total_emitted_vertices_vec);
       gather_outputs(bld);
       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
                                  bld->outputs,
                                  total_emitted_vertices_vec);
       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
-                                masked_ones);
+                                mask);
       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
-                                masked_ones);
+                                mask);
 #if DUMP_GS_EMITS
       lp_build_print_value(bld->bld_base.base.gallivm,
                            " +++ emit vertex masked ones = ",
-                           masked_ones);
+                           mask);
       lp_build_print_value(bld->bld_base.base.gallivm,
                            " +++ emit vertex emitted = ",
                            total_emitted_vertices_vec);
@@ -2693,7 +2683,7 @@ emit_vertex(
 
 static void
 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
-                     LLVMValueRef masked_ones)
+                     LLVMValueRef mask)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
@@ -2703,7 +2693,7 @@ end_primitive_masked(struct lp_build_tgsi_context * bld_base,
          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
       LLVMValueRef emitted_prims_vec =
          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
-      
+
       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
                                    emitted_vertices_vec,
                                    emitted_prims_vec);
@@ -2711,7 +2701,7 @@ end_primitive_masked(struct lp_build_tgsi_context * bld_base,
 #if DUMP_GS_EMITS
       lp_build_print_value(bld->bld_base.base.gallivm,
                            " +++ end prim masked ones = ",
-                           masked_ones);
+                           mask);
       lp_build_print_value(bld->bld_base.base.gallivm,
                            " +++ end prim emitted verts1 = ",
                            emitted_vertices_vec);
@@ -2721,9 +2711,9 @@ end_primitive_masked(struct lp_build_tgsi_context * bld_base,
                                          bld->emitted_prims_vec_ptr, ""));
 #endif
       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
-                                masked_ones);
+                                mask);
       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
-                                   masked_ones);
+                                   mask);
 #if DUMP_GS_EMITS
       lp_build_print_value(bld->bld_base.base.gallivm,
                            " +++ end prim emitted verts2 = ",
@@ -2744,7 +2734,7 @@ end_primitive(
 
    if (bld->gs_iface->end_primitive) {
       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-      LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
+      LLVMValueRef mask = mask_vec(bld_base);
       struct lp_build_context *uint_bld = &bld_base->uint_bld;
       LLVMValueRef emitted_verts = LLVMBuildLoad(
          builder, bld->emitted_vertices_vec_ptr, "");
@@ -2755,9 +2745,9 @@ end_primitive(
          telling us which, if any, execution slots actually have
          unemitted primitives, this way we make sure that end_primitives
          executes only on the paths that have unflushed vertices */
-      masked_ones = LLVMBuildAnd(builder, masked_ones, emitted_mask, "");
-      
-      end_primitive_masked(bld_base, masked_ones);
+      mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
+
+      end_primitive_masked(bld_base, mask);
    }
 }
 
@@ -3289,7 +3279,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
          }
       }
       bld.max_output_vertices_vec =
-         lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
+         lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
                                 max_output_vertices);
    }
 
-- 
1.7.9.5


More information about the mesa-dev mailing list