[Mesa-dev] [PATCH V2 4/7] ac: add support for explicit component packing

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Tue Oct 24 23:27:24 UTC 2017


On Wed, Oct 25, 2017 at 1:04 AM, Timothy Arceri <tarceri at itsqueeze.com> wrote:
> This is needed for RADV to support explicit component packing.
>
> This is also required to use the new NIR component splitting /
> packing passes.
>
> V2:
>  - add commponent packing support for interpolate_at* intrinsics
>  - improve store packing support when not all varyings are scalar
>    as spotted by Bas the store source was incorrectly offset.
> ---
>  src/amd/common/ac_nir_to_llvm.c | 68 +++++++++++++++++++++++++++++++----------
>  1 file changed, 52 insertions(+), 16 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 2e50e50b12..5d9c5be7d2 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1060,21 +1060,20 @@ static int get_llvm_num_components(LLVMValueRef value)
>                                       : 1;
>         return num_components;
>  }
>
>  static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac,
>                                       LLVMValueRef value,
>                                       int index)
>  {
>         int count = get_llvm_num_components(value);
>
> -       assert(index < count);
>         if (count == 1)
>                 return value;
>
>         return LLVMBuildExtractElement(ac->builder, value,
>                                        LLVMConstInt(ac->i32, index, false), "");
>  }
>
>  static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
>                                  LLVMValueRef value, unsigned count)
>  {
> @@ -2811,20 +2810,42 @@ get_dw_address(struct nir_to_llvm_context *ctx,
>         dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
>                                LLVMConstInt(ctx->i32, param * 4, false), "");
>
>         if (const_index && compact_const_index)
>                 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
>                                        LLVMConstInt(ctx->i32, const_index, false), "");
>         return dw_addr;
>  }
>
>  static LLVMValueRef
> +build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
> +                           unsigned value_count, unsigned component)
> +{
> +       LLVMValueRef vec = NULL;
> +
> +       if (value_count == 1) {
> +               return values[component];
> +       } else if (!value_count)
> +               unreachable("value_count is 0");
> +
> +       for (unsigned i = component; i < value_count + component; i++) {
> +               LLVMValueRef value = values[i];
> +
> +               if (!i)
> +                       vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
> +               LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);

Doesn't this need to be i - component to get a range of [0, value_count)?

Otherwise

Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>



> +               vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
> +       }
> +       return vec;
> +}
> +
> +static LLVMValueRef
>  load_tcs_input(struct nir_to_llvm_context *ctx,
>                nir_intrinsic_instr *instr)
>  {
>         LLVMValueRef dw_addr, stride;
>         unsigned const_index;
>         LLVMValueRef vertex_index;
>         LLVMValueRef indir_index;
>         unsigned param;
>         LLVMValueRef value[4], result;
>         const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
> @@ -2832,26 +2853,27 @@ load_tcs_input(struct nir_to_llvm_context *ctx,
>         param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
>         get_deref_offset(ctx->nir, instr->variables[0],
>                          false, NULL, per_vertex ? &vertex_index : NULL,
>                          &const_index, &indir_index);
>
>         stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
>         dw_addr = get_tcs_in_current_patch_offset(ctx);
>         dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
>                                  indir_index);
>
> -       for (unsigned i = 0; i < instr->num_components; i++) {
> +       unsigned comp = instr->variables[0]->var->data.location_frac;
> +       for (unsigned i = 0; i < instr->num_components + comp; i++) {
>                 value[i] = lds_load(ctx, dw_addr);
>                 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
>                                        ctx->i32one, "");
>         }
> -       result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
> +       result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
>         result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
>         return result;
>  }
>
>  static LLVMValueRef
>  load_tcs_output(struct nir_to_llvm_context *ctx,
>                nir_intrinsic_instr *instr)
>  {
>         LLVMValueRef dw_addr;
>         LLVMValueRef stride = NULL;
> @@ -2870,43 +2892,45 @@ load_tcs_output(struct nir_to_llvm_context *ctx,
>         if (!instr->variables[0]->var->data.patch) {
>                 stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
>                 dw_addr = get_tcs_out_current_patch_offset(ctx);
>         } else {
>                 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
>         }
>
>         dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
>                                  indir_index);
>
> -       for (unsigned i = 0; i < instr->num_components; i++) {
> +       unsigned comp = instr->variables[0]->var->data.location_frac;
> +       for (unsigned i = comp; i < instr->num_components + comp; i++) {
>                 value[i] = lds_load(ctx, dw_addr);
>                 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
>                                        ctx->i32one, "");
>         }
> -       result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
> +       result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
>         result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
>         return result;
>  }
>
>  static void
>  store_tcs_output(struct nir_to_llvm_context *ctx,
>                  nir_intrinsic_instr *instr,
>                  LLVMValueRef src,
>                  unsigned writemask)
>  {
>         LLVMValueRef dw_addr;
>         LLVMValueRef stride = NULL;
>         LLVMValueRef buf_addr = NULL;
>         LLVMValueRef vertex_index = NULL;
>         LLVMValueRef indir_index = NULL;
>         unsigned const_index = 0;
>         unsigned param;
> +       const unsigned comp = instr->variables[0]->var->data.location_frac;
>         const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
>         const bool is_compact = instr->variables[0]->var->data.compact;
>
>         get_deref_offset(ctx->nir, instr->variables[0],
>                          false, NULL, per_vertex ? &vertex_index : NULL,
>                          &const_index, &indir_index);
>
>         param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
>         if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
>             is_compact && const_index > 3) {
> @@ -2930,21 +2954,21 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
>
>         bool is_tess_factor = false;
>         if (instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
>             instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
>                 is_tess_factor = true;
>
>         unsigned base = is_compact ? const_index : 0;
>         for (unsigned chan = 0; chan < 8; chan++) {
>                 if (!(writemask & (1 << chan)))
>                         continue;
> -               LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan);
> +               LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - comp);
>
>                 lds_store(ctx, dw_addr, value);
>
>                 if (!is_tess_factor && writemask != 0xF)
>                         ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
>                                                     buf_addr, ctx->oc_lds,
>                                                     4 * (base + chan), 1, 0, true, false);
>
>                 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
>                                        ctx->i32one, "");
> @@ -2972,23 +2996,28 @@ load_tes_input(struct nir_to_llvm_context *ctx,
>
>         get_deref_offset(ctx->nir, instr->variables[0],
>                          false, NULL, per_vertex ? &vertex_index : NULL,
>                          &const_index, &indir_index);
>         param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
>         if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
>             is_compact && const_index > 3) {
>                 const_index -= 3;
>                 param++;
>         }
> +
> +       unsigned comp = instr->variables[0]->var->data.location_frac;
>         buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
>                                                      is_compact, vertex_index, indir_index);
>
> +       LLVMValueRef comp_offset = LLVMConstInt(ctx->i32, comp * 4, false);
> +       buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
> +
>         result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
>                                       buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
>         result = trim_vector(&ctx->ac, result, instr->num_components);
>         result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
>         return result;
>  }
>
>  static LLVMValueRef
>  load_gs_input(struct nir_to_llvm_context *ctx,
>               nir_intrinsic_instr *instr)
> @@ -3001,21 +3030,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>         unsigned vertex_index;
>         get_deref_offset(ctx->nir, instr->variables[0],
>                          false, &vertex_index, NULL,
>                          &const_index, &indir_index);
>         vtx_offset_param = vertex_index;
>         assert(vtx_offset_param < 6);
>         vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
>                                   LLVMConstInt(ctx->i32, 4, false), "");
>
>         param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
> -       for (unsigned i = 0; i < instr->num_components; i++) {
> +
> +       unsigned comp = instr->variables[0]->var->data.location_frac;
> +       for (unsigned i = comp; i < instr->num_components + comp; i++) {
>                 if (ctx->ac.chip_class >= GFX9) {
>                         LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
>                         dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
>                                                LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
>                         value[i] = lds_load(ctx, dw_addr);
>                 } else {
>                         args[0] = ctx->esgs_ring;
>                         args[1] = vtx_offset;
>                         args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
>                         args[3] = ctx->i32zero;
> @@ -3024,21 +3055,21 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>                         args[6] = ctx->i32one; /* GLC */
>                         args[7] = ctx->i32zero; /* SLC */
>                         args[8] = ctx->i32zero; /* TFE */
>
>                         value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
>                                                       ctx->i32, args, 9,
>                                                       AC_FUNC_ATTR_READONLY |
>                                                       AC_FUNC_ATTR_LEGACY);
>                 }
>         }
> -       result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
> +       result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
>
>         return result;
>  }
>
>  static LLVMValueRef
>  build_gep_for_deref(struct ac_nir_context *ctx,
>                     nir_deref_var *deref)
>  {
>         struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
>         assert(entry->data);
> @@ -3074,41 +3105,43 @@ build_gep_for_deref(struct ac_nir_context *ctx,
>         }
>         return val;
>  }
>
>  static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
>                                    nir_intrinsic_instr *instr)
>  {
>         LLVMValueRef values[8];
>         int idx = instr->variables[0]->var->data.driver_location;
>         int ve = instr->dest.ssa.num_components;
> +       unsigned comp = instr->variables[0]->var->data.location_frac;
>         LLVMValueRef indir_index;
>         LLVMValueRef ret;
>         unsigned const_index;
>         bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
>                      instr->variables[0]->var->data.mode == nir_var_shader_in;
>         get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
>                                       &const_index, &indir_index);
>
>         if (instr->dest.ssa.bit_size == 64)
>                 ve *= 2;
>
>         switch (instr->variables[0]->var->data.mode) {
>         case nir_var_shader_in:
>                 if (ctx->stage == MESA_SHADER_TESS_CTRL)
>                         return load_tcs_input(ctx->nctx, instr);
>                 if (ctx->stage == MESA_SHADER_TESS_EVAL)
>                         return load_tes_input(ctx->nctx, instr);
>                 if (ctx->stage == MESA_SHADER_GEOMETRY) {
>                         return load_gs_input(ctx->nctx, instr);
>                 }
> -               for (unsigned chan = 0; chan < ve; chan++) {
> +
> +               for (unsigned chan = comp; chan < ve + comp; chan++) {
>                         if (indir_index) {
>                                 unsigned count = glsl_count_attribute_slots(
>                                                 instr->variables[0]->var->type,
>                                                 ctx->stage == MESA_SHADER_VERTEX);
>                                 count -= chan / 4;
>                                 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>                                                 &ctx->ac, ctx->abi->inputs + idx + chan, count,
>                                                 4, false, true);
>
>                                 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
> @@ -3140,21 +3173,22 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
>                 LLVMValueRef address = build_gep_for_deref(ctx,
>                                                            instr->variables[0]);
>                 LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
>                 return LLVMBuildBitCast(ctx->ac.builder, val,
>                                         get_def_type(ctx, &instr->dest.ssa),
>                                         "");
>         }
>         case nir_var_shader_out:
>                 if (ctx->stage == MESA_SHADER_TESS_CTRL)
>                         return load_tcs_output(ctx->nctx, instr);
> -               for (unsigned chan = 0; chan < ve; chan++) {
> +
> +               for (unsigned chan = comp; chan < ve + comp; chan++) {
>                         if (indir_index) {
>                                 unsigned count = glsl_count_attribute_slots(
>                                                 instr->variables[0]->var->type, false);
>                                 count -= chan / 4;
>                                 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>                                                 &ctx->ac, ctx->outputs + idx + chan, count,
>                                                 4, true, true);
>
>                                 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
>                                                                        tmp_vec,
> @@ -3162,32 +3196,33 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
>                         } else {
>                                 values[chan] = LLVMBuildLoad(ctx->ac.builder,
>                                                      ctx->outputs[idx + chan + const_index * 4],
>                                                      "");
>                         }
>                 }
>                 break;
>         default:
>                 unreachable("unhandle variable mode");
>         }
> -       ret = ac_build_gather_values(&ctx->ac, values, ve);
> +       ret = build_varying_gather_values(&ctx->ac, values, ve, comp);
>         return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
>  }
>
>  static void
>  visit_store_var(struct ac_nir_context *ctx,
>                 nir_intrinsic_instr *instr)
>  {
>         LLVMValueRef temp_ptr, value;
>         int idx = instr->variables[0]->var->data.driver_location;
> +       unsigned comp = instr->variables[0]->var->data.location_frac;
>         LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
> -       int writemask = instr->const_index[0];
> +       int writemask = instr->const_index[0] << comp;
>         LLVMValueRef indir_index;
>         unsigned const_index;
>         get_deref_offset(ctx, instr->variables[0], false,
>                          NULL, NULL, &const_index, &indir_index);
>
>         if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
>                 int old_writemask = writemask;
>
>                 src = LLVMBuildBitCast(ctx->ac.builder, src,
>                                        LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2),
> @@ -3206,21 +3241,21 @@ visit_store_var(struct ac_nir_context *ctx,
>                 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
>                         store_tcs_output(ctx->nctx, instr, src, writemask);
>                         return;
>                 }
>
>                 for (unsigned chan = 0; chan < 8; chan++) {
>                         int stride = 4;
>                         if (!(writemask & (1 << chan)))
>                                 continue;
>
> -                       value = llvm_extract_elem(&ctx->ac, src, chan);
> +                       value = llvm_extract_elem(&ctx->ac, src, chan - comp);
>
>                         if (instr->variables[0]->var->data.compact)
>                                 stride = 1;
>                         if (indir_index) {
>                                 unsigned count = glsl_count_attribute_slots(
>                                                 instr->variables[0]->var->type, false);
>                                 count -= chan / 4;
>                                 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>                                                 &ctx->ac, ctx->outputs + idx + chan, count,
>                                                 stride, true, true);
> @@ -3907,21 +3942,21 @@ static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
>         LLVMValueRef values[2];
>
>         values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]);
>         values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]);
>         return ac_build_gather_values(&ctx->ac, values, 2);
>  }
>
>  static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
>                                  const nir_intrinsic_instr *instr)
>  {
> -       LLVMValueRef result[2];
> +       LLVMValueRef result[4];
>         LLVMValueRef interp_param, attr_number;
>         unsigned location;
>         unsigned chan;
>         LLVMValueRef src_c0 = NULL;
>         LLVMValueRef src_c1 = NULL;
>         LLVMValueRef src0 = NULL;
>         int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
>         switch (instr->intrinsic) {
>         case nir_intrinsic_interp_var_at_centroid:
>                 location = INTERP_CENTROID;
> @@ -3985,42 +4020,43 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
>                         temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
>                         temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
>
>                         ij_out[i] = LLVMBuildBitCast(ctx->builder,
>                                                      temp2, ctx->i32, "");
>                 }
>                 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
>
>         }
>
> -       for (chan = 0; chan < 2; chan++) {
> +       for (chan = 0; chan < 4; chan++) {
>                 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
>
>                 if (interp_param) {
>                         interp_param = LLVMBuildBitCast(ctx->builder,
>                                                         interp_param, LLVMVectorType(ctx->f32, 2), "");
>                         LLVMValueRef i = LLVMBuildExtractElement(
>                                 ctx->builder, interp_param, ctx->i32zero, "");
>                         LLVMValueRef j = LLVMBuildExtractElement(
>                                 ctx->builder, interp_param, ctx->i32one, "");
>
>                         result[chan] = ac_build_fs_interp(&ctx->ac,
>                                                           llvm_chan, attr_number,
>                                                           ctx->prim_mask, i, j);
>                 } else {
>                         result[chan] = ac_build_fs_interp_mov(&ctx->ac,
>                                                               LLVMConstInt(ctx->i32, 2, false),
>                                                               llvm_chan, attr_number,
>                                                               ctx->prim_mask);
>                 }
>         }
> -       return ac_build_gather_values(&ctx->ac, result, 2);
> +       return build_varying_gather_values(&ctx->ac, result, instr->num_components,
> +                                          instr->variables[0]->var->data.location_frac);
>  }
>
>  static void
>  visit_emit_vertex(struct nir_to_llvm_context *ctx,
>                   const nir_intrinsic_instr *instr)
>  {
>         LLVMValueRef gs_next_vertex;
>         LLVMValueRef can_emit, kill;
>         int idx;
>
> --
> 2.13.6
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list