[Mesa-dev] [PATCH] radeonsi: handle big number of immediates dynamically

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Jan 11 12:45:18 UTC 2017


On 01/11/2017 12:47 PM, Marek Olšák wrote:
> We don't have to use lp_build_tgsi_soa_context::immediates at all just
> as we don't use lp_build_tgsi_soa_context::temps.

You want to always allocate the array of immediates dynamically? And use 
ctx->imms/ctx->imms_count like what we currently do for temps?

>
> Marek
>
> On Wed, Jan 11, 2017 at 11:51 AM, Grazvydas Ignotas <notasas at gmail.com> wrote:
>> On Tue, Jan 10, 2017 at 7:33 PM, Samuel Pitoiset
>> <samuel.pitoiset at gmail.com> wrote:
>>> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
>>> index 3e0f7c4f76..3cd87f2f66 100644
>>> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
>>> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
>>> @@ -677,14 +677,14 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
>>>                 if (tgsi_type_is_64bit(type)) {
>>>                         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
>>>                         result = LLVMConstInsertElement(result,
>>> -                                                       bld->immediates[reg->Register.Index][swizzle],
>>> +                                                       si_llvm_get_immediate(bld_base, reg->Register.Index, swizzle),
>>>                                                         bld_base->int_bld.zero);
>>>                         result = LLVMConstInsertElement(result,
>>> -                                                       bld->immediates[reg->Register.Index][swizzle + 1],
>>> +                                                       si_llvm_get_immediate(bld_base, reg->Register.Index, swizzle + 1),
>>>                                                         bld_base->int_bld.one);
>>>                         return LLVMConstBitCast(result, ctype);
>>>                 } else {
>>> -                       return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
>>> +                       return LLVMConstBitCast(si_llvm_get_immediate(bld_base, reg->Register.Index, swizzle), ctype);
>>>                 }
>>>         }
>>>
>>> @@ -1230,13 +1230,28 @@ static void emit_immediate(struct lp_build_tgsi_context *bld_base,
>>>         struct si_shader_context *ctx = si_shader_context(bld_base);
>>>
>>>         for (i = 0; i < 4; ++i) {
>>> -               ctx->soa.immediates[ctx->soa.num_immediates][i] =
>>> -                               LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
>>> +               LLVMValueRef value =
>>> +                       LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false);
>>> +               if (!ctx->imms_array) {
>>> +                       ctx->soa.immediates[ctx->soa.num_immediates][i] = value;
>>> +               } else {
>>> +                       ctx->imms_array[ctx->soa.num_immediates * 4 + i] = value;
>>> +               }
>>>         }
>>>
>>>         ctx->soa.num_immediates++;
>>>  }
>>>
>>> +LLVMValueRef si_llvm_get_immediate(struct lp_build_tgsi_context *bld_base,
>>> +                                  int index, int channel)
>>> +{
>>> +       struct si_shader_context *ctx = si_shader_context(bld_base);
>>> +
>>> +       if (!ctx->imms_array)
>>> +               return ctx->soa.immediates[index][channel];
>>> +       return ctx->imms_array[index * 4 + channel];
>>> +}
>>> +
>>>  void si_llvm_context_init(struct si_shader_context *ctx,
>>>                           struct si_screen *sscreen,
>>>                           struct si_shader *shader,
>>> @@ -1281,6 +1296,16 @@ void si_llvm_context_init(struct si_shader_context *ctx,
>>>                                          ctx->temp_arrays);
>>>         }
>>>
>>> +       if (info &&
>>> +           info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES) {
>>> +               int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
>>> +
>>> +               /* Use a dynamically allocated array for immediates when their
>>> +                * number is too great, but only in certain situations for
>>> +                * performance reasons because static allocation is better. */
>>> +               ctx->imms_array = CALLOC(size * 4, sizeof(ctx->imms_array[0]));
>>> +       }
>>
>> Would it make sense adding something here like
>>
>>     else
>>         ctx->imms_array = ctx->soa.immediates;
>>
>> and then on free
>>     if (ctx->imms_array != ctx->soa.immediates)
>>         FREE(ctx->imms_array);
>>
>> to avoid all the conditionals around usual imms_array use?
>>
>> Gražvydas
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list