[Mesa-dev] [PATCH 3/6] ac/nir: rewrite local variable handling
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Thu Jul 6 21:01:23 UTC 2017
On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott <connora at valvesoftware.com> wrote:
> From: Connor Abbott <cwabbott0 at gmail.com>
>
> The old way was very TGSI-based, and couldn't handle indirect
> dereferences at all. Instead, pass through the type information NIR has
I think the old code should handle indirect derefs just fine? See the
indir_index stuff. I'm kind of worried that LLVM doesn't promote
variables with indirect derefs agrresively enough to registers. IIRC
it only converts scalar arrays of up to 4 elements to vectors, while
radv always promotes and IIRC radeonsi promotes for all variables with
something like <= 20 dwords?
> about local variables to LLVM, and translate NIR dereferences directly
> into the equivalent GEP instructions in LLVM.
> ---
> src/amd/common/ac_nir_to_llvm.c | 204 ++++++++++++++++++++++++++++------------
> 1 file changed, 146 insertions(+), 58 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index e72747a..f42d214 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -65,6 +65,7 @@ struct nir_to_llvm_context {
>
> struct hash_table *defs;
> struct hash_table *phis;
> + struct hash_table *vars;
>
> LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
> LLVMValueRef ring_offsets;
> @@ -157,8 +158,6 @@ struct nir_to_llvm_context {
> LLVMValueRef shared_memory;
> uint64_t input_mask;
> uint64_t output_mask;
> - int num_locals;
> - LLVMValueRef *locals;
> uint8_t num_output_clips;
> uint8_t num_output_culls;
>
> @@ -2905,6 +2904,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
> return result;
> }
>
> +static LLVMValueRef
> +build_gep_for_deref(struct nir_to_llvm_context *ctx,
> + nir_deref_var *deref)
> +{
> + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
> + assert(entry->data);
> + LLVMValueRef val = entry->data;
> + nir_deref *tail = deref->deref.child;
> + while (tail != NULL) {
> + LLVMValueRef offset;
> + switch (tail->deref_type) {
> + case nir_deref_type_array: {
> + nir_deref_array *array = nir_deref_as_array(tail);
> + offset = LLVMConstInt(ctx->i32, array->base_offset, 0);
> + if (array->deref_array_type ==
> + nir_deref_array_type_indirect) {
> + offset = LLVMBuildAdd(ctx->builder, offset,
> + get_src(ctx,
> + array->indirect),
> + "");
> + }
> + break;
> + }
> + case nir_deref_type_struct: {
> + nir_deref_struct *deref_struct =
> + nir_deref_as_struct(tail);
> + offset = LLVMConstInt(ctx->i32,
> + deref_struct->index, 0);
> + break;
> + }
> + default:
> + unreachable("bad deref type");
> + }
> + val = ac_build_gep0(&ctx->ac, val, offset);
> + tail = tail->child;
> + }
> + return val;
> +}
> +
> static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
> nir_intrinsic_instr *instr)
> {
> @@ -2948,24 +2986,14 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
> values[chan] = ctx->inputs[idx + chan + const_index * 4];
> }
> break;
> - case nir_var_local:
> - for (unsigned chan = 0; chan < ve; chan++) {
> - if (indir_index) {
> - unsigned count = glsl_count_attribute_slots(
> - instr->variables[0]->var->type, false);
> - count -= chan / 4;
> - LLVMValueRef tmp_vec = ac_build_gather_values_extended(
> - &ctx->ac, ctx->locals + idx + chan, count,
> - 4, true);
> -
> - values[chan] = LLVMBuildExtractElement(ctx->builder,
> - tmp_vec,
> - indir_index, "");
> - } else {
> - values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
> - }
> - }
> - break;
> + case nir_var_local: {
> + LLVMValueRef address = build_gep_for_deref(ctx,
> + instr->variables[0]);
> + LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
> + return LLVMBuildBitCast(ctx->builder, val,
> + get_def_type(ctx, &instr->dest.ssa),
> + "");
> + }
> case nir_var_shader_out:
> if (ctx->stage == MESA_SHADER_TESS_CTRL)
> return load_tcs_output(ctx, instr);
> @@ -3079,31 +3107,36 @@ visit_store_var(struct nir_to_llvm_context *ctx,
> }
> }
> break;
> - case nir_var_local:
> - for (unsigned chan = 0; chan < 8; chan++) {
> - if (!(writemask & (1 << chan)))
> - continue;
> -
> - value = llvm_extract_elem(ctx, src, chan);
> - if (indir_index) {
> - unsigned count = glsl_count_attribute_slots(
> - instr->variables[0]->var->type, false);
> - count -= chan / 4;
> - LLVMValueRef tmp_vec = ac_build_gather_values_extended(
> - &ctx->ac, ctx->locals + idx + chan, count,
> - 4, true);
> -
> - tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
> - value, indir_index, "");
> - build_store_values_extended(ctx, ctx->locals + idx + chan,
> - count, 4, tmp_vec);
> - } else {
> - temp_ptr = ctx->locals[idx + chan + const_index * 4];
> -
> - LLVMBuildStore(ctx->builder, value, temp_ptr);
> + case nir_var_local: {
> + int writemask = instr->const_index[0];
> + LLVMValueRef address = build_gep_for_deref(ctx,
> + instr->variables[0]);
> + LLVMValueRef val = get_src(ctx, instr->src[0]);
> + unsigned components =
> + glsl_get_vector_elements(
> + nir_deref_tail(&instr->variables[0]->deref)->type);
> + if (writemask == (1 << components) - 1) {
> + val = LLVMBuildBitCast(
> + ctx->builder, val,
> + LLVMGetElementType(LLVMTypeOf(address)), "");
> + LLVMBuildStore(ctx->builder, val, address);
> + } else {
> + for (unsigned chan = 0; chan < 4; chan++) {
> + if (!(writemask & (1 << chan)))
> + continue;
> + LLVMValueRef ptr =
> + LLVMBuildStructGEP(ctx->builder,
> + address, chan, "");
> + LLVMValueRef src = llvm_extract_elem(ctx, val,
> + chan);
> + src = LLVMBuildBitCast(
> + ctx->builder, src,
> + LLVMGetElementType(LLVMTypeOf(ptr)), "");
> + LLVMBuildStore(ctx->builder, src, ptr);
> }
> }
> break;
> + }
> case nir_var_shared: {
> LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
>
> @@ -5005,26 +5038,79 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
> ctx->output_mask |= mask_attribs;
> }
>
> +static LLVMTypeRef
> +glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx,
> + enum glsl_base_type type)
> +{
> + switch (type) {
> + case GLSL_TYPE_INT:
> + case GLSL_TYPE_UINT:
> + case GLSL_TYPE_BOOL:
> + case GLSL_TYPE_SUBROUTINE:
> + return ctx->i32;
> + case GLSL_TYPE_FLOAT: /* TODO handle mediump */
> + return ctx->f32;
> + case GLSL_TYPE_INT64:
> + case GLSL_TYPE_UINT64:
> + return ctx->i64;
> + case GLSL_TYPE_DOUBLE:
> + return ctx->f64;
> + default:
> + unreachable("unknown GLSL type");
> + }
> +}
> +
> +static LLVMTypeRef
> +glsl_to_llvm_type(struct nir_to_llvm_context *ctx,
> + const struct glsl_type *type)
> +{
> + if (glsl_type_is_scalar(type)) {
> + return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type));
> + }
> +
> + if (glsl_type_is_vector(type)) {
> + return LLVMVectorType(
> + glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)),
> + glsl_get_vector_elements(type));
> + }
> +
> + if (glsl_type_is_matrix(type)) {
> + return LLVMArrayType(
> + glsl_to_llvm_type(ctx, glsl_get_column_type(type)),
> + glsl_get_matrix_columns(type));
> + }
> +
> + if (glsl_type_is_array(type)) {
> + return LLVMArrayType(
> + glsl_to_llvm_type(ctx, glsl_get_array_element(type)),
> + glsl_get_length(type));
> + }
> +
> + assert(glsl_type_is_struct(type));
> +
> + LLVMTypeRef member_types[glsl_get_length(type)];
> +
> + for (unsigned i = 0; i < glsl_get_length(type); i++) {
> + member_types[i] =
> + glsl_to_llvm_type(ctx,
> + glsl_get_struct_field(type, i));
> + }
> +
> + return LLVMStructTypeInContext(ctx->context, member_types,
> + glsl_get_length(type), false);
> +}
> +
> static void
> setup_locals(struct nir_to_llvm_context *ctx,
> struct nir_function *func)
> {
> - int i, j;
> - ctx->num_locals = 0;
> nir_foreach_variable(variable, &func->impl->locals) {
> - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
> - variable->data.driver_location = ctx->num_locals * 4;
> - ctx->num_locals += attrib_count;
> - }
> - ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
> - if (!ctx->locals)
> - return;
> -
> - for (i = 0; i < ctx->num_locals; i++) {
> - for (j = 0; j < 4; j++) {
> - ctx->locals[i * 4 + j] =
> - si_build_alloca_undef(ctx, ctx->f32, "temp");
> - }
> + LLVMValueRef local =
> + si_build_alloca_undef(ctx,
> + glsl_to_llvm_type(ctx,
> + variable->type),
> + variable->name ? variable->name : "");
> + _mesa_hash_table_insert(ctx->vars, variable, local);
> }
> }
>
> @@ -6034,6 +6120,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
> _mesa_key_pointer_equal);
> ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
> _mesa_key_pointer_equal);
> + ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
> + _mesa_key_pointer_equal);
>
> func = (struct nir_function *)exec_list_get_head(&nir->functions);
>
> @@ -6048,9 +6136,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
> ac_llvm_finalize_module(&ctx);
>
> ac_nir_eliminate_const_vs_outputs(&ctx);
> - free(ctx.locals);
> ralloc_free(ctx.defs);
> ralloc_free(ctx.phis);
> + ralloc_free(ctx.vars);
>
> if (nir->stage == MESA_SHADER_GEOMETRY) {
> unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4;
> --
> 2.9.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list