[Mesa-dev] [PATCH 4/5] draw: simplify fetch some more

Jose Fonseca jfonseca at vmware.com
Thu Nov 17 17:09:12 UTC 2016


On 13/11/16 16:48, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> Don't keep the ofbit. This is just a minor simplification, just adjust
> the buffer size so that there will always be an overflow if buffers aren't
> valid to fetch from.
> Also, get rid of control flow from the instanced path too. Not worried about
> performance, but it's simpler and keeps the code more similar to ordinary
> fetch.
> ---
>  src/gallium/auxiliary/draw/draw_llvm.c | 118 +++++++++++++++------------------
>  1 file changed, 55 insertions(+), 63 deletions(-)
>
> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
> index 2478b11..414f2dc 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm.c
> @@ -661,22 +661,23 @@ fetch_instanced(struct gallivm_state *gallivm,
>                  LLVMValueRef vb_stride,
>                  LLVMValueRef map_ptr,
>                  LLVMValueRef buffer_size_adj,
> -                LLVMValueRef ofbit,
>                  LLVMValueRef *inputs,
>                  LLVMValueRef index)
>  {
> -   LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
> +   LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
> +   LLVMTypeRef aosf_t, aosi_t;
> +   LLVMValueRef zero = LLVMConstNull(i32_t);
>     LLVMBuilderRef builder = gallivm->builder;
> -   LLVMValueRef stride, buffer_overflowed, aos;
> -   LLVMValueRef temp_ptr =
> -      lp_build_alloca(gallivm,
> -                      lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
> -   struct lp_build_if_state if_ctx;
> +   LLVMValueRef stride, buffer_overflowed, aos, index_valid;
> +   LLVMValueRef ofbit = NULL;
>     unsigned i;
>
> +   aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
> +   aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
> +
>     stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
>
> -   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
> +   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
>                                       stride, buffer_size_adj,
>                                       "buffer_overflowed");
>     buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
> @@ -686,28 +687,22 @@ fetch_instanced(struct gallivm_state *gallivm,
>        lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
>     }
>
> -   lp_build_if(&if_ctx, gallivm, buffer_overflowed);
> -   {
> -      LLVMValueRef val =
> -         lp_build_const_vec(gallivm, lp_float32_vec4_type(), 0);
> -      LLVMBuildStore(builder, val, temp_ptr);
> -   }
> -   lp_build_else(&if_ctx);
> -   {
> -      LLVMValueRef val;
> -
> -      val = lp_build_fetch_rgba_aos(gallivm,
> -                                    format_desc,
> -                                    lp_float32_vec4_type(),
> -                                    FALSE,
> -                                    map_ptr,
> -                                    stride, zero, zero,
> -                                    NULL);
> -      LLVMBuildStore(builder, val, temp_ptr);
> -   }
> -   lp_build_endif(&if_ctx);
> +   index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
> +   index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
> +   stride = LLVMBuildAnd(builder, stride, index_valid, "");
>
> -   aos = LLVMBuildLoad(builder, temp_ptr, "aos");
> +   aos = lp_build_fetch_rgba_aos(gallivm,
> +                                 format_desc,
> +                                 lp_float32_vec4_type(),
> +                                 FALSE,
> +                                 map_ptr,
> +                                 stride, zero, zero,
> +                                 NULL);
> +

Comment here would be nice.

Otherwise series looks good.  A nice cleanup.


Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

> +   index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
> +   aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
> +   aos = LLVMBuildAnd(builder, aos, index_valid, "");
> +   aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
>
>     for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
>        LLVMValueRef index = lp_build_const_int32(gallivm, i);
> @@ -758,7 +753,6 @@ fetch_vector(struct gallivm_state *gallivm,
>               LLVMValueRef vb_stride,
>               LLVMValueRef map_ptr,
>               LLVMValueRef buffer_size_adj,
> -             LLVMValueRef ofmask,
>               LLVMValueRef *inputs,
>               LLVMValueRef indices)
>  {
> @@ -786,12 +780,11 @@ fetch_vector(struct gallivm_state *gallivm,
>      */
>     offset = lp_build_mul_32_lohi_cpu(&blduivec, vb_stride, indices, &tmp);
>
> -   tmp = lp_build_compare(gallivm, blduivec.type,
> -                          PIPE_FUNC_EQUAL, tmp, blduivec.zero);
> -   valid_mask = lp_build_andnot(&blduivec, tmp, ofmask);
> +   valid_mask = lp_build_compare(gallivm, blduivec.type,
> +                                 PIPE_FUNC_EQUAL, tmp, blduivec.zero);
>
>     tmp = lp_build_compare(gallivm, blduivec.type,
> -                          PIPE_FUNC_LEQUAL, offset, buffer_size_adj);
> +                          PIPE_FUNC_LESS, offset, buffer_size_adj);
>     valid_mask = LLVMBuildAnd(builder, tmp, valid_mask, "");
>
>     /* not valid elements use offset 0 */
> @@ -1581,7 +1574,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
>     LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
>     LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
>     LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
> -   LLVMValueRef ofmask[PIPE_MAX_ATTRIBS];
>     LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
>     LLVMValueRef fake_buf_ptr, fake_buf;
>
> @@ -1762,9 +1754,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
>           buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
>
>           ofbit = NULL;
> +         /*
> +          * We'll set buffer_size_adj to zero if we have of, so it will
> +          * always overflow later automatically without having to keep ofbit.
> +          */
>           buf_offset = lp_build_uadd_overflow(gallivm, vb_buffer_offset,
>                                               src_offset, &ofbit);
> -         buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, bsize,
> +         tmp = lp_build_sub(&bld, bsize, bld.one);
> +         buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
>                                                       &ofbit);
>           buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
>                                                       buf_offset, &ofbit);
> @@ -1776,13 +1773,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
>            * inside the main loop.
>            * (Alternatively, could have control flow per vector skipping fetch
>            * if ofbit is true.)
> -          * For instanced elements, we keep the control flow for now as it's a
> -          * scalar fetch, making things easier.
>            */
>           if (velem->instance_divisor) {
> -            /* Index is equal to the start instance plus the number of current
> +            /*
> +             * Index is equal to the start instance plus the number of current
>               * instance divided by the divisor. In this case we compute it as:
> -             * index = start_instance + (instance_id  / divisor)
> +             * index = start_instance + (instance_id  / divisor).
> +             * Note we could actually do the fetch here, outside the loop -
> +             * it's all constant, hopefully llvm recognizes this.
>               */
>              LLVMValueRef current_instance;
>              current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
> @@ -1791,31 +1789,25 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
>                                               "instance_divisor");
>              instance_index[j] = lp_build_uadd_overflow(gallivm, start_instance,
>                                                         current_instance, &ofbit);
> -            map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
> -
> -            /* This is a scalar fetch, just keep the of bit */
> -            ofmask[j] = ofbit;
>           }
> -         else {
> -            temp_ptr = lp_build_alloca_undef(gallivm,
> -                          LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
>
> -            lp_build_if(&if_ctx, gallivm, ofbit);
> -            {
> -               LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
> -            }
> -            lp_build_else(&if_ctx);
> -            {
> -               map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
> -               LLVMBuildStore(builder, map_ptr[j], temp_ptr);
> -            }
> -            lp_build_endif(&if_ctx);
> -            map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr");
> +         buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
> +                                              buffer_size_adj[j], "");
> +
> +         temp_ptr = lp_build_alloca_undef(gallivm,
> +                       LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
>
> -            /* Expand to vector mask */
> -            ofmask[j] = LLVMBuildSExt(builder, ofbit, int32_type, "");
> -            ofmask[j] = lp_build_broadcast_scalar(&blduivec, ofmask[j]);
> +         lp_build_if(&if_ctx, gallivm, ofbit);
> +         {
> +            LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
> +         }
> +         lp_build_else(&if_ctx);
> +         {
> +            map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
> +            LLVMBuildStore(builder, map_ptr[j], temp_ptr);
>           }
> +         lp_build_endif(&if_ctx);
> +         map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr");
>
>           if (0) {
>              lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
> @@ -1917,13 +1909,13 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
>           else if (velem->instance_divisor) {
>              fetch_instanced(gallivm, format_desc, vs_type,
>                              vb_stride[j], map_ptr[j],
> -                            buffer_size_adj[j], ofmask[j],
> +                            buffer_size_adj[j],
>                              inputs[j], instance_index[j]);
>           }
>           else {
>              fetch_vector(gallivm, format_desc, vs_type,
>                           vb_stride[j], map_ptr[j],
> -                         buffer_size_adj[j], ofmask[j],
> +                         buffer_size_adj[j],
>                           inputs[j], true_index_array);
>           }
>        }
>



More information about the mesa-dev mailing list