Mesa (master): draw: simplify fetch some more

Roland Scheidegger sroland at kemper.freedesktop.org
Mon Nov 21 19:04:18 UTC 2016


Module: Mesa
Branch: master
Commit: 2471aaa02fc2be0f5fea2df7c7ae6af3aab16b26
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2471aaa02fc2be0f5fea2df7c7ae6af3aab16b26

Author: Roland Scheidegger <sroland at vmware.com>
Date:   Sun Nov 13 16:33:20 2016 +0100

draw: simplify fetch some more

Don't keep the ofbit. This is just a minor simplification, just adjust
the buffer size so that there will always be an overflow if buffers aren't
valid to fetch from.
Also, get rid of control flow from the instanced path too. Not worried about
performance, but it's simpler and keeps the code more similar to ordinary
fetch.

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

---

 src/gallium/auxiliary/draw/draw_llvm.c | 118 +++++++++++++++------------------
 1 file changed, 55 insertions(+), 63 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 2478b11..414f2dc 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -661,22 +661,23 @@ fetch_instanced(struct gallivm_state *gallivm,
                 LLVMValueRef vb_stride,
                 LLVMValueRef map_ptr,
                 LLVMValueRef buffer_size_adj,
-                LLVMValueRef ofbit,
                 LLVMValueRef *inputs,
                 LLVMValueRef index)
 {
-   LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
+   LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
+   LLVMTypeRef aosf_t, aosi_t;
+   LLVMValueRef zero = LLVMConstNull(i32_t);
    LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef stride, buffer_overflowed, aos;
-   LLVMValueRef temp_ptr =
-      lp_build_alloca(gallivm,
-                      lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
-   struct lp_build_if_state if_ctx;
+   LLVMValueRef stride, buffer_overflowed, aos, index_valid;
+   LLVMValueRef ofbit = NULL;
    unsigned i;
 
+   aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
+   aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
+
    stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
 
-   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
+   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
                                      stride, buffer_size_adj,
                                      "buffer_overflowed");
    buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
@@ -686,28 +687,22 @@ fetch_instanced(struct gallivm_state *gallivm,
       lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
    }
 
-   lp_build_if(&if_ctx, gallivm, buffer_overflowed);
-   {
-      LLVMValueRef val =
-         lp_build_const_vec(gallivm, lp_float32_vec4_type(), 0);
-      LLVMBuildStore(builder, val, temp_ptr);
-   }
-   lp_build_else(&if_ctx);
-   {
-      LLVMValueRef val;
-
-      val = lp_build_fetch_rgba_aos(gallivm,
-                                    format_desc,
-                                    lp_float32_vec4_type(),
-                                    FALSE,
-                                    map_ptr,
-                                    stride, zero, zero,
-                                    NULL);
-      LLVMBuildStore(builder, val, temp_ptr);
-   }
-   lp_build_endif(&if_ctx);
+   index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
+   index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
+   stride = LLVMBuildAnd(builder, stride, index_valid, "");
 
-   aos = LLVMBuildLoad(builder, temp_ptr, "aos");
+   aos = lp_build_fetch_rgba_aos(gallivm,
+                                 format_desc,
+                                 lp_float32_vec4_type(),
+                                 FALSE,
+                                 map_ptr,
+                                 stride, zero, zero,
+                                 NULL);
+
+   index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
+   aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
+   aos = LLVMBuildAnd(builder, aos, index_valid, "");
+   aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
 
    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
       LLVMValueRef index = lp_build_const_int32(gallivm, i);
@@ -758,7 +753,6 @@ fetch_vector(struct gallivm_state *gallivm,
              LLVMValueRef vb_stride,
              LLVMValueRef map_ptr,
              LLVMValueRef buffer_size_adj,
-             LLVMValueRef ofmask,
              LLVMValueRef *inputs,
              LLVMValueRef indices)
 {
@@ -786,12 +780,11 @@ fetch_vector(struct gallivm_state *gallivm,
     */
    offset = lp_build_mul_32_lohi_cpu(&blduivec, vb_stride, indices, &tmp);
 
-   tmp = lp_build_compare(gallivm, blduivec.type,
-                          PIPE_FUNC_EQUAL, tmp, blduivec.zero);
-   valid_mask = lp_build_andnot(&blduivec, tmp, ofmask);
+   valid_mask = lp_build_compare(gallivm, blduivec.type,
+                                 PIPE_FUNC_EQUAL, tmp, blduivec.zero);
 
    tmp = lp_build_compare(gallivm, blduivec.type,
-                          PIPE_FUNC_LEQUAL, offset, buffer_size_adj);
+                          PIPE_FUNC_LESS, offset, buffer_size_adj);
    valid_mask = LLVMBuildAnd(builder, tmp, valid_mask, "");
 
    /* not valid elements use offset 0 */
@@ -1581,7 +1574,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
    LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
    LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
-   LLVMValueRef ofmask[PIPE_MAX_ATTRIBS];
    LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
    LLVMValueRef fake_buf_ptr, fake_buf;
 
@@ -1762,9 +1754,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
          buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
 
          ofbit = NULL;
+         /*
+          * We'll set buffer_size_adj to zero if we have of, so it will
+          * always overflow later automatically without having to keep ofbit.
+          */
          buf_offset = lp_build_uadd_overflow(gallivm, vb_buffer_offset,
                                              src_offset, &ofbit);
-         buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, bsize,
+         tmp = lp_build_sub(&bld, bsize, bld.one);
+         buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
                                                      &ofbit);
          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
                                                      buf_offset, &ofbit);
@@ -1776,13 +1773,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
           * inside the main loop.
           * (Alternatively, could have control flow per vector skipping fetch
           * if ofbit is true.)
-          * For instanced elements, we keep the control flow for now as it's a
-          * scalar fetch, making things easier.
           */
          if (velem->instance_divisor) {
-            /* Index is equal to the start instance plus the number of current
+            /*
+             * Index is equal to the start instance plus the number of current
              * instance divided by the divisor. In this case we compute it as:
-             * index = start_instance + (instance_id  / divisor)
+             * index = start_instance + (instance_id  / divisor).
+             * Note we could actually do the fetch here, outside the loop -
+             * it's all constant, hopefully llvm recognizes this.
              */
             LLVMValueRef current_instance;
             current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
@@ -1791,31 +1789,25 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
                                              "instance_divisor");
             instance_index[j] = lp_build_uadd_overflow(gallivm, start_instance,
                                                        current_instance, &ofbit);
-            map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
-
-            /* This is a scalar fetch, just keep the of bit */
-            ofmask[j] = ofbit;
          }
-         else {
-            temp_ptr = lp_build_alloca_undef(gallivm,
-                          LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
 
-            lp_build_if(&if_ctx, gallivm, ofbit);
-            {
-               LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
-            }
-            lp_build_else(&if_ctx);
-            {
-               map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
-               LLVMBuildStore(builder, map_ptr[j], temp_ptr);
-            }
-            lp_build_endif(&if_ctx);
-            map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr");
+         buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
+                                              buffer_size_adj[j], "");
+
+         temp_ptr = lp_build_alloca_undef(gallivm,
+                       LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
 
-            /* Expand to vector mask */
-            ofmask[j] = LLVMBuildSExt(builder, ofbit, int32_type, "");
-            ofmask[j] = lp_build_broadcast_scalar(&blduivec, ofmask[j]);
+         lp_build_if(&if_ctx, gallivm, ofbit);
+         {
+            LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
+         }
+         lp_build_else(&if_ctx);
+         {
+            map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
+            LLVMBuildStore(builder, map_ptr[j], temp_ptr);
          }
+         lp_build_endif(&if_ctx);
+         map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr");
 
          if (0) {
             lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
@@ -1917,13 +1909,13 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
          else if (velem->instance_divisor) {
             fetch_instanced(gallivm, format_desc, vs_type,
                             vb_stride[j], map_ptr[j],
-                            buffer_size_adj[j], ofmask[j],
+                            buffer_size_adj[j],
                             inputs[j], instance_index[j]);
          }
          else {
             fetch_vector(gallivm, format_desc, vs_type,
                          vb_stride[j], map_ptr[j],
-                         buffer_size_adj[j], ofmask[j],
+                         buffer_size_adj[j],
                          inputs[j], true_index_array);
          }
       }




More information about the mesa-commit mailing list