[Mesa-dev] [PATCH 3/6] ac/nir: rewrite local variable handling

Connor Abbott cwabbott0 at gmail.com
Thu Jul 6 21:18:36 UTC 2017


On Thu, Jul 6, 2017 at 2:01 PM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott <connora at valvesoftware.com> wrote:
>> From: Connor Abbott <cwabbott0 at gmail.com>
>>
>> The old way was very TGSI-based, and couldn't handle indirect
>> dereferences at all. Instead, pass through the type information NIR has
>
> I think the old code should handle indirect derefs just fine? See the
> indir_index stuff. I'm kind of worried that LLVM doesn't promote
> variables with indirect derefs agrresively enough to registers. IIRC
> it only converts scalar arrays of up to 4 elements to vectors, while
> radv always promotes and IIRC radeonsi promotes for all variables with
> something like <= 20 dwords?

Ah, true, I missed that while deleting it :). At the same time, if
LLVM isn't putting stuff into registers aggressively enough, that's
something that should be fixed in LLVM. Gathering everything into a
vector every time everything something is used, like what this code
does, is going to make it harder for LLVM to know what stuff aliases
and optimize accordingly since LLVM won't be able to use its normal
alias analysis. I don't know if LLVM will be able to optimize that
pattern as well.

>
>
>> about local variables to LLVM, and translate NIR dereferences directly
>> into the equivalent GEP instructions in LLVM.
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 204 ++++++++++++++++++++++++++++------------
>>  1 file changed, 146 insertions(+), 58 deletions(-)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
>> index e72747a..f42d214 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -65,6 +65,7 @@ struct nir_to_llvm_context {
>>
>>         struct hash_table *defs;
>>         struct hash_table *phis;
>> +       struct hash_table *vars;
>>
>>         LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
>>         LLVMValueRef ring_offsets;
>> @@ -157,8 +158,6 @@ struct nir_to_llvm_context {
>>         LLVMValueRef shared_memory;
>>         uint64_t input_mask;
>>         uint64_t output_mask;
>> -       int num_locals;
>> -       LLVMValueRef *locals;
>>         uint8_t num_output_clips;
>>         uint8_t num_output_culls;
>>
>> @@ -2905,6 +2904,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>>         return result;
>>  }
>>
>> +static LLVMValueRef
>> +build_gep_for_deref(struct nir_to_llvm_context *ctx,
>> +                   nir_deref_var *deref)
>> +{
>> +       struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
>> +       assert(entry->data);
>> +       LLVMValueRef val = entry->data;
>> +       nir_deref *tail = deref->deref.child;
>> +       while (tail != NULL) {
>> +               LLVMValueRef offset;
>> +               switch (tail->deref_type) {
>> +               case nir_deref_type_array: {
>> +                       nir_deref_array *array = nir_deref_as_array(tail);
>> +                       offset = LLVMConstInt(ctx->i32, array->base_offset, 0);
>> +                       if (array->deref_array_type ==
>> +                           nir_deref_array_type_indirect) {
>> +                               offset = LLVMBuildAdd(ctx->builder, offset,
>> +                                                     get_src(ctx,
>> +                                                             array->indirect),
>> +                                                     "");
>> +                       }
>> +                       break;
>> +               }
>> +               case nir_deref_type_struct: {
>> +                       nir_deref_struct *deref_struct =
>> +                               nir_deref_as_struct(tail);
>> +                       offset = LLVMConstInt(ctx->i32,
>> +                                             deref_struct->index, 0);
>> +                       break;
>> +               }
>> +               default:
>> +                       unreachable("bad deref type");
>> +               }
>> +               val = ac_build_gep0(&ctx->ac, val, offset);
>> +               tail = tail->child;
>> +       }
>> +       return val;
>> +}
>> +
>>  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>>                                    nir_intrinsic_instr *instr)
>>  {
>> @@ -2948,24 +2986,14 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>>                                 values[chan] = ctx->inputs[idx + chan + const_index * 4];
>>                 }
>>                 break;
>> -       case nir_var_local:
>> -               for (unsigned chan = 0; chan < ve; chan++) {
>> -                       if (indir_index) {
>> -                               unsigned count = glsl_count_attribute_slots(
>> -                                       instr->variables[0]->var->type, false);
>> -                               count -= chan / 4;
>> -                               LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>> -                                               &ctx->ac, ctx->locals + idx + chan, count,
>> -                                               4, true);
>> -
>> -                               values[chan] = LLVMBuildExtractElement(ctx->builder,
>> -                                                                      tmp_vec,
>> -                                                                      indir_index, "");
>> -                       } else {
>> -                               values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
>> -                       }
>> -               }
>> -               break;
>> +       case nir_var_local: {
>> +               LLVMValueRef address = build_gep_for_deref(ctx,
>> +                                                          instr->variables[0]);
>> +               LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
>> +               return LLVMBuildBitCast(ctx->builder, val,
>> +                                       get_def_type(ctx, &instr->dest.ssa),
>> +                                       "");
>> +       }
>>         case nir_var_shader_out:
>>                 if (ctx->stage == MESA_SHADER_TESS_CTRL)
>>                         return load_tcs_output(ctx, instr);
>> @@ -3079,31 +3107,36 @@ visit_store_var(struct nir_to_llvm_context *ctx,
>>                         }
>>                 }
>>                 break;
>> -       case nir_var_local:
>> -               for (unsigned chan = 0; chan < 8; chan++) {
>> -                       if (!(writemask & (1 << chan)))
>> -                               continue;
>> -
>> -                       value = llvm_extract_elem(ctx, src, chan);
>> -                       if (indir_index) {
>> -                               unsigned count = glsl_count_attribute_slots(
>> -                                       instr->variables[0]->var->type, false);
>> -                               count -= chan / 4;
>> -                               LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>> -                                       &ctx->ac, ctx->locals + idx + chan, count,
>> -                                       4, true);
>> -
>> -                               tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
>> -                                                                value, indir_index, "");
>> -                               build_store_values_extended(ctx, ctx->locals + idx + chan,
>> -                                                           count, 4, tmp_vec);
>> -                       } else {
>> -                               temp_ptr = ctx->locals[idx + chan + const_index * 4];
>> -
>> -                               LLVMBuildStore(ctx->builder, value, temp_ptr);
>> +       case nir_var_local: {
>> +               int writemask = instr->const_index[0];
>> +               LLVMValueRef address = build_gep_for_deref(ctx,
>> +                                                          instr->variables[0]);
>> +               LLVMValueRef val = get_src(ctx, instr->src[0]);
>> +               unsigned components =
>> +                       glsl_get_vector_elements(
>> +                          nir_deref_tail(&instr->variables[0]->deref)->type);
>> +               if (writemask == (1 << components) - 1) {
>> +                       val = LLVMBuildBitCast(
>> +                          ctx->builder, val,
>> +                          LLVMGetElementType(LLVMTypeOf(address)), "");
>> +                       LLVMBuildStore(ctx->builder, val, address);
>> +               } else {
>> +                       for (unsigned chan = 0; chan < 4; chan++) {
>> +                               if (!(writemask & (1 << chan)))
>> +                                       continue;
>> +                               LLVMValueRef ptr =
>> +                                       LLVMBuildStructGEP(ctx->builder,
>> +                                                          address, chan, "");
>> +                               LLVMValueRef src = llvm_extract_elem(ctx, val,
>> +                                                                    chan);
>> +                               src = LLVMBuildBitCast(
>> +                                  ctx->builder, src,
>> +                                  LLVMGetElementType(LLVMTypeOf(ptr)), "");
>> +                               LLVMBuildStore(ctx->builder, src, ptr);
>>                         }
>>                 }
>>                 break;
>> +       }
>>         case nir_var_shared: {
>>                 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
>>
>> @@ -5005,26 +5038,79 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
>>         ctx->output_mask |= mask_attribs;
>>  }
>>
>> +static LLVMTypeRef
>> +glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx,
>> +                      enum glsl_base_type type)
>> +{
>> +       switch (type) {
>> +       case GLSL_TYPE_INT:
>> +       case GLSL_TYPE_UINT:
>> +       case GLSL_TYPE_BOOL:
>> +       case GLSL_TYPE_SUBROUTINE:
>> +               return ctx->i32;
>> +       case GLSL_TYPE_FLOAT: /* TODO handle mediump */
>> +               return ctx->f32;
>> +       case GLSL_TYPE_INT64:
>> +       case GLSL_TYPE_UINT64:
>> +               return ctx->i64;
>> +       case GLSL_TYPE_DOUBLE:
>> +               return ctx->f64;
>> +       default:
>> +               unreachable("unknown GLSL type");
>> +       }
>> +}
>> +
>> +static LLVMTypeRef
>> +glsl_to_llvm_type(struct nir_to_llvm_context *ctx,
>> +                 const struct glsl_type *type)
>> +{
>> +       if (glsl_type_is_scalar(type)) {
>> +               return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type));
>> +       }
>> +
>> +       if (glsl_type_is_vector(type)) {
>> +               return LLVMVectorType(
>> +                  glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)),
>> +                  glsl_get_vector_elements(type));
>> +       }
>> +
>> +       if (glsl_type_is_matrix(type)) {
>> +               return LLVMArrayType(
>> +                  glsl_to_llvm_type(ctx, glsl_get_column_type(type)),
>> +                  glsl_get_matrix_columns(type));
>> +       }
>> +
>> +       if (glsl_type_is_array(type)) {
>> +               return LLVMArrayType(
>> +                  glsl_to_llvm_type(ctx, glsl_get_array_element(type)),
>> +                  glsl_get_length(type));
>> +       }
>> +
>> +       assert(glsl_type_is_struct(type));
>> +
>> +       LLVMTypeRef member_types[glsl_get_length(type)];
>> +
>> +       for (unsigned i = 0; i < glsl_get_length(type); i++) {
>> +               member_types[i] =
>> +                       glsl_to_llvm_type(ctx,
>> +                                         glsl_get_struct_field(type, i));
>> +       }
>> +
>> +       return LLVMStructTypeInContext(ctx->context, member_types,
>> +                                      glsl_get_length(type), false);
>> +}
>> +
>>  static void
>>  setup_locals(struct nir_to_llvm_context *ctx,
>>              struct nir_function *func)
>>  {
>> -       int i, j;
>> -       ctx->num_locals = 0;
>>         nir_foreach_variable(variable, &func->impl->locals) {
>> -               unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
>> -               variable->data.driver_location = ctx->num_locals * 4;
>> -               ctx->num_locals += attrib_count;
>> -       }
>> -       ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
>> -       if (!ctx->locals)
>> -           return;
>> -
>> -       for (i = 0; i < ctx->num_locals; i++) {
>> -               for (j = 0; j < 4; j++) {
>> -                       ctx->locals[i * 4 + j] =
>> -                               si_build_alloca_undef(ctx, ctx->f32, "temp");
>> -               }
>> +               LLVMValueRef local =
>> +                       si_build_alloca_undef(ctx,
>> +                                             glsl_to_llvm_type(ctx,
>> +                                                               variable->type),
>> +                                             variable->name ? variable->name : "");
>> +               _mesa_hash_table_insert(ctx->vars, variable, local);
>>         }
>>  }
>>
>> @@ -6034,6 +6120,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>>                                            _mesa_key_pointer_equal);
>>         ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>>                                            _mesa_key_pointer_equal);
>> +       ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>> +                                            _mesa_key_pointer_equal);
>>
>>         func = (struct nir_function *)exec_list_get_head(&nir->functions);
>>
>> @@ -6048,9 +6136,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>>         ac_llvm_finalize_module(&ctx);
>>
>>         ac_nir_eliminate_const_vs_outputs(&ctx);
>> -       free(ctx.locals);
>>         ralloc_free(ctx.defs);
>>         ralloc_free(ctx.phis);
>> +       ralloc_free(ctx.vars);
>>
>>         if (nir->stage == MESA_SHADER_GEOMETRY) {
>>                 unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4;
>> --
>> 2.9.4
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list