[Mesa-dev] [PATCH 3/6] ac/nir: rewrite local variable handling

Fri Jul 7 01:31:43 UTC 2017

On Thu, Jul 6, 2017 at 2:18 PM, Connor Abbott <cwabbott0 at gmail.com> wrote:
> On Thu, Jul 6, 2017 at 2:01 PM, Bas Nieuwenhuizen
> <bas at basnieuwenhuizen.nl> wrote:
>> On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott <connora at valvesoftware.com> wrote:
>>> From: Connor Abbott <cwabbott0 at gmail.com>
>>>
>>> The old way was very TGSI-based, and couldn't handle indirect
>>> dereferences at all. Instead, pass through the type information NIR has
>>
>> I think the old code should handle indirect derefs just fine? See the
>> indir_index stuff. I'm kind of worried that LLVM doesn't promote
>> variables with indirect derefs agrresively enough to registers. IIRC
>> it only converts scalar arrays of up to 4 elements to vectors, while
>> radv always promotes and IIRC radeonsi promotes for all variables with
>> something like <= 20 dwords?
>
> Ah, true, I missed that while deleting it :). At the same time, if
> LLVM isn't putting stuff into registers aggressively enough, that's
> something that should be fixed in LLVM. Gathering everything into a
> vector every time everything something is used, like what this code
> does, is going to make it harder for LLVM to know what stuff aliases
> and optimize accordingly since LLVM won't be able to use its normal
> alias analysis. I don't know if LLVM will be able to optimize that
> pattern as well.

After looking into it some more, I think LLVM won't promote allocas to
registers at all when there are non-constant indices in the mix, and
fixing it seems kinda involved. I guess a better solution for now
would be to use nir_lower_locals_to_regs, since then each
register/array can get its own vector. I'll respin the series to
handle just shared variables, since that will fix the Feral issues,
but I don't want to leave too much other churn for Nicolai.

>
>>
>>
>>> about local variables to LLVM, and translate NIR dereferences directly
>>> into the equivalent GEP instructions in LLVM.
>>> ---
>>>  src/amd/common/ac_nir_to_llvm.c | 204 ++++++++++++++++++++++++++++------------
>>>  1 file changed, 146 insertions(+), 58 deletions(-)
>>>
>>> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
>>> index e72747a..f42d214 100644
>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>> @@ -65,6 +65,7 @@ struct nir_to_llvm_context {
>>>
>>>         struct hash_table *defs;
>>>         struct hash_table *phis;
>>> +       struct hash_table *vars;
>>>
>>>         LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
>>>         LLVMValueRef ring_offsets;
>>> @@ -157,8 +158,6 @@ struct nir_to_llvm_context {
>>>         LLVMValueRef shared_memory;
>>>         uint64_t input_mask;
>>>         uint64_t output_mask;
>>> -       int num_locals;
>>> -       LLVMValueRef *locals;
>>>         uint8_t num_output_clips;
>>>         uint8_t num_output_culls;
>>>
>>> @@ -2905,6 +2904,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>>>         return result;
>>>  }
>>>
>>> +static LLVMValueRef
>>> +build_gep_for_deref(struct nir_to_llvm_context *ctx,
>>> +                   nir_deref_var *deref)
>>> +{
>>> +       struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
>>> +       assert(entry->data);
>>> +       LLVMValueRef val = entry->data;
>>> +       nir_deref *tail = deref->deref.child;
>>> +       while (tail != NULL) {
>>> +               LLVMValueRef offset;
>>> +               switch (tail->deref_type) {
>>> +               case nir_deref_type_array: {
>>> +                       nir_deref_array *array = nir_deref_as_array(tail);
>>> +                       offset = LLVMConstInt(ctx->i32, array->base_offset, 0);
>>> +                       if (array->deref_array_type ==
>>> +                           nir_deref_array_type_indirect) {
>>> +                               offset = LLVMBuildAdd(ctx->builder, offset,
>>> +                                                     get_src(ctx,
>>> +                                                             array->indirect),
>>> +                                                     "");
>>> +                       }
>>> +                       break;
>>> +               }
>>> +               case nir_deref_type_struct: {
>>> +                       nir_deref_struct *deref_struct =
>>> +                               nir_deref_as_struct(tail);
>>> +                       offset = LLVMConstInt(ctx->i32,
>>> +                                             deref_struct->index, 0);
>>> +                       break;
>>> +               }
>>> +               default:
>>> +                       unreachable("bad deref type");
>>> +               }
>>> +               val = ac_build_gep0(&ctx->ac, val, offset);
>>> +               tail = tail->child;
>>> +       }
>>> +       return val;
>>> +}
>>> +
>>>  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>>>                                    nir_intrinsic_instr *instr)
>>>  {
>>> @@ -2948,24 +2986,14 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>>>                                 values[chan] = ctx->inputs[idx + chan + const_index * 4];
>>>                 }
>>>                 break;
>>> -       case nir_var_local:
>>> -               for (unsigned chan = 0; chan < ve; chan++) {
>>> -                       if (indir_index) {
>>> -                               unsigned count = glsl_count_attribute_slots(
>>> -                                       instr->variables[0]->var->type, false);
>>> -                               count -= chan / 4;
>>> -                               LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>>> -                                               &ctx->ac, ctx->locals + idx + chan, count,
>>> -                                               4, true);
>>> -
>>> -                               values[chan] = LLVMBuildExtractElement(ctx->builder,
>>> -                                                                      tmp_vec,
>>> -                                                                      indir_index, "");
>>> -                       } else {
>>> -                               values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
>>> -                       }
>>> -               }
>>> -               break;
>>> +       case nir_var_local: {
>>> +               LLVMValueRef address = build_gep_for_deref(ctx,
>>> +                                                          instr->variables[0]);
>>> +               LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
>>> +               return LLVMBuildBitCast(ctx->builder, val,
>>> +                                       get_def_type(ctx, &instr->dest.ssa),
>>> +                                       "");
>>> +       }
>>>         case nir_var_shader_out:
>>>                 if (ctx->stage == MESA_SHADER_TESS_CTRL)
>>>                         return load_tcs_output(ctx, instr);
>>> @@ -3079,31 +3107,36 @@ visit_store_var(struct nir_to_llvm_context *ctx,
>>>                         }
>>>                 }
>>>                 break;
>>> -       case nir_var_local:
>>> -               for (unsigned chan = 0; chan < 8; chan++) {
>>> -                       if (!(writemask & (1 << chan)))
>>> -                               continue;
>>> -
>>> -                       value = llvm_extract_elem(ctx, src, chan);
>>> -                       if (indir_index) {
>>> -                               unsigned count = glsl_count_attribute_slots(
>>> -                                       instr->variables[0]->var->type, false);
>>> -                               count -= chan / 4;
>>> -                               LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>>> -                                       &ctx->ac, ctx->locals + idx + chan, count,
>>> -                                       4, true);
>>> -
>>> -                               tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
>>> -                                                                value, indir_index, "");
>>> -                               build_store_values_extended(ctx, ctx->locals + idx + chan,
>>> -                                                           count, 4, tmp_vec);
>>> -                       } else {
>>> -                               temp_ptr = ctx->locals[idx + chan + const_index * 4];
>>> -
>>> -                               LLVMBuildStore(ctx->builder, value, temp_ptr);
>>> +       case nir_var_local: {
>>> +               int writemask = instr->const_index[0];
>>> +               LLVMValueRef address = build_gep_for_deref(ctx,
>>> +                                                          instr->variables[0]);
>>> +               LLVMValueRef val = get_src(ctx, instr->src[0]);
>>> +               unsigned components =
>>> +                       glsl_get_vector_elements(
>>> +                          nir_deref_tail(&instr->variables[0]->deref)->type);
>>> +               if (writemask == (1 << components) - 1) {
>>> +                       val = LLVMBuildBitCast(
>>> +                          ctx->builder, val,
>>> +                          LLVMGetElementType(LLVMTypeOf(address)), "");
>>> +                       LLVMBuildStore(ctx->builder, val, address);
>>> +               } else {
>>> +                       for (unsigned chan = 0; chan < 4; chan++) {
>>> +                               if (!(writemask & (1 << chan)))
>>> +                                       continue;
>>> +                               LLVMValueRef ptr =
>>> +                                       LLVMBuildStructGEP(ctx->builder,
>>> +                                                          address, chan, "");
>>> +                               LLVMValueRef src = llvm_extract_elem(ctx, val,
>>> +                                                                    chan);
>>> +                               src = LLVMBuildBitCast(
>>> +                                  ctx->builder, src,
>>> +                                  LLVMGetElementType(LLVMTypeOf(ptr)), "");
>>> +                               LLVMBuildStore(ctx->builder, src, ptr);
>>>                         }
>>>                 }
>>>                 break;
>>> +       }
>>>         case nir_var_shared: {
>>>                 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
>>>
>>> @@ -5005,26 +5038,79 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
>>>         ctx->output_mask |= mask_attribs;
>>>  }
>>>
>>> +static LLVMTypeRef
>>> +glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx,
>>> +                      enum glsl_base_type type)
>>> +{
>>> +       switch (type) {
>>> +       case GLSL_TYPE_INT:
>>> +       case GLSL_TYPE_UINT:
>>> +       case GLSL_TYPE_BOOL:
>>> +       case GLSL_TYPE_SUBROUTINE:
>>> +               return ctx->i32;
>>> +       case GLSL_TYPE_FLOAT: /* TODO handle mediump */
>>> +               return ctx->f32;
>>> +       case GLSL_TYPE_INT64:
>>> +       case GLSL_TYPE_UINT64:
>>> +               return ctx->i64;
>>> +       case GLSL_TYPE_DOUBLE:
>>> +               return ctx->f64;
>>> +       default:
>>> +               unreachable("unknown GLSL type");
>>> +       }
>>> +}
>>> +
>>> +static LLVMTypeRef
>>> +glsl_to_llvm_type(struct nir_to_llvm_context *ctx,
>>> +                 const struct glsl_type *type)
>>> +{
>>> +       if (glsl_type_is_scalar(type)) {
>>> +               return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type));
>>> +       }
>>> +
>>> +       if (glsl_type_is_vector(type)) {
>>> +               return LLVMVectorType(
>>> +                  glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)),
>>> +                  glsl_get_vector_elements(type));
>>> +       }
>>> +
>>> +       if (glsl_type_is_matrix(type)) {
>>> +               return LLVMArrayType(
>>> +                  glsl_to_llvm_type(ctx, glsl_get_column_type(type)),
>>> +                  glsl_get_matrix_columns(type));
>>> +       }
>>> +
>>> +       if (glsl_type_is_array(type)) {
>>> +               return LLVMArrayType(
>>> +                  glsl_to_llvm_type(ctx, glsl_get_array_element(type)),
>>> +                  glsl_get_length(type));
>>> +       }
>>> +
>>> +       assert(glsl_type_is_struct(type));
>>> +
>>> +       LLVMTypeRef member_types[glsl_get_length(type)];
>>> +
>>> +       for (unsigned i = 0; i < glsl_get_length(type); i++) {
>>> +               member_types[i] =
>>> +                       glsl_to_llvm_type(ctx,
>>> +                                         glsl_get_struct_field(type, i));
>>> +       }
>>> +
>>> +       return LLVMStructTypeInContext(ctx->context, member_types,
>>> +                                      glsl_get_length(type), false);
>>> +}
>>> +
>>>  static void
>>>  setup_locals(struct nir_to_llvm_context *ctx,
>>>              struct nir_function *func)
>>>  {
>>> -       int i, j;
>>> -       ctx->num_locals = 0;
>>>         nir_foreach_variable(variable, &func->impl->locals) {
>>> -               unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
>>> -               variable->data.driver_location = ctx->num_locals * 4;
>>> -               ctx->num_locals += attrib_count;
>>> -       }
>>> -       ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
>>> -       if (!ctx->locals)
>>> -           return;
>>> -
>>> -       for (i = 0; i < ctx->num_locals; i++) {
>>> -               for (j = 0; j < 4; j++) {
>>> -                       ctx->locals[i * 4 + j] =
>>> -                               si_build_alloca_undef(ctx, ctx->f32, "temp");
>>> -               }
>>> +               LLVMValueRef local =
>>> +                       si_build_alloca_undef(ctx,
>>> +                                             glsl_to_llvm_type(ctx,
>>> +                                                               variable->type),
>>> +                                             variable->name ? variable->name : "");
>>> +               _mesa_hash_table_insert(ctx->vars, variable, local);
>>>         }
>>>  }
>>>
>>> @@ -6034,6 +6120,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>>>                                            _mesa_key_pointer_equal);
>>>         ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>>>                                            _mesa_key_pointer_equal);
>>> +       ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>>> +                                            _mesa_key_pointer_equal);
>>>
>>>         func = (struct nir_function *)exec_list_get_head(&nir->functions);
>>>
>>> @@ -6048,9 +6136,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>>>         ac_llvm_finalize_module(&ctx);
>>>
>>>         ac_nir_eliminate_const_vs_outputs(&ctx);
>>> -       free(ctx.locals);
>>>         ralloc_free(ctx.defs);
>>>         ralloc_free(ctx.phis);
>>> +       ralloc_free(ctx.vars);
>>>
>>>         if (nir->stage == MESA_SHADER_GEOMETRY) {
>>>                 unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4;
>>> --
>>> 2.9.4
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev