[Mesa-dev] [PATCH v2 37/41] radv, ac: implement 16-bit interpolation

Rhys Perry pendingchaos02 at gmail.com
Sat Feb 16 00:36:20 UTC 2019


This patch can be ignored. I forgot to delete it and it ended up getting sent.
"[PATCH v2 37/41] WIP: radv, ac: implement 16-bit interpolation" is
the correct one.

On Sat, 16 Feb 2019 at 00:23, Rhys Perry <pendingchaos02 at gmail.com> wrote:
>
> v2: add to patch series
>
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
>  src/amd/common/ac_llvm_build.c           | 33 +++++++++++++++++-------
>  src/amd/common/ac_llvm_build.h           |  3 ++-
>  src/amd/common/ac_nir_to_llvm.c          | 14 +++++++---
>  src/amd/vulkan/radv_nir_to_llvm.c        | 27 ++++++++++++++-----
>  src/amd/vulkan/radv_pipeline.c           | 19 ++++++++------
>  src/amd/vulkan/radv_shader.h             |  1 +
>  src/gallium/drivers/radeonsi/si_shader.c |  2 +-
>  7 files changed, 69 insertions(+), 30 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index dff369aae7f..be2c2251a21 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -937,27 +937,40 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
>                    LLVMValueRef attr_number,
>                    LLVMValueRef params,
>                    LLVMValueRef i,
> -                  LLVMValueRef j)
> +                  LLVMValueRef j,
> +                  int word)
>  {
> -       LLVMValueRef args[5];
> +       LLVMValueRef args[6];
>         LLVMValueRef p1;
>
>         args[0] = i;
>         args[1] = llvm_chan;
>         args[2] = attr_number;
> -       args[3] = params;
> -
> -       p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
> -                               ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
> +       if (word >= 0) {
> +               args[3] = LLVMConstInt(ctx->i1, word, false);
> +               args[4] = params;
> +               p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
> +                                       ctx->f16, args, 5, AC_FUNC_ATTR_READNONE);
> +       } else {
> +               args[3] = params;
> +               p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
> +                                       ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
> +       }
>
>         args[0] = p1;
>         args[1] = j;
>         args[2] = llvm_chan;
>         args[3] = attr_number;
> -       args[4] = params;
> -
> -       return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
> -                                 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
> +       if (word >= 0) {
> +               args[4] = LLVMConstInt(ctx->i1, word, false);
> +               args[5] = params;
> +               return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
> +                                         ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
> +       } else {
> +               args[4] = params;
> +               return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
> +                                         ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
> +       }
>  }
>
>  LLVMValueRef
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 61c9b5e4b6c..655427567c4 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -224,7 +224,8 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
>                    LLVMValueRef attr_number,
>                    LLVMValueRef params,
>                    LLVMValueRef i,
> -                  LLVMValueRef j);
> +                  LLVMValueRef j,
> +                  int word);
>
>  LLVMValueRef
>  ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index bf7024c68e4..939b8eb13de 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3120,8 +3120,15 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
>                                 LLVMValueRef j = LLVMBuildExtractElement(
>                                         ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
>
> +                               /* This fp16 handling isn't technically correct
> +                                * but should be correct for the attributes we
> +                                * are actually going to use. */
> +                               bool fp16 = instr->dest.ssa.bit_size == 16;
> +                               int word = fp16 ? 0 : -1;
>                                 v = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
> -                                                      ctx->abi->prim_mask, i, j);
> +                                                      ctx->abi->prim_mask, i, j, word);
> +                               if (fp16)
> +                                       v = ac_build_reinterpret(&ctx->ac, v, ctx->ac.f32);
>                         } else {
>                                 v = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, false),
>                                                            llvm_chan, attr_number, ctx->abi->prim_mask);
> @@ -3134,8 +3141,9 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
>                 result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, attrib_idx, "");
>
>         }
> -       return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
> -                                             var->data.location_frac);
> +       LLVMValueRef ret = ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
> +                                                         var->data.location_frac);
> +       return ac_build_reinterpret(&ctx->ac, ret, get_def_type(ctx, &instr->dest.ssa));
>  }
>
>  static void visit_intrinsic(struct ac_nir_context *ctx,
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
> index c46eabf3656..49f8d35dd5f 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2051,7 +2051,8 @@ static void interp_fs_input(struct radv_shader_context *ctx,
>                             unsigned attr,
>                             LLVMValueRef interp_param,
>                             LLVMValueRef prim_mask,
> -                           LLVMValueRef result[4])
> +                           LLVMValueRef result[4],
> +                           bool fp16)
>  {
>         LLVMValueRef attr_number;
>         unsigned chan;
> @@ -2086,7 +2087,10 @@ static void interp_fs_input(struct radv_shader_context *ctx,
>                         result[chan] = ac_build_fs_interp(&ctx->ac,
>                                                           llvm_chan,
>                                                           attr_number,
> -                                                         prim_mask, i, j);
> +                                                         prim_mask, i, j,
> +                                                         fp16 ? 0 : -1);
> +                       if (fp16)
> +                               result[chan] = ac_build_reinterpret(&ctx->ac, result[chan], ctx->ac.f16);
>                 } else {
>                         result[chan] = ac_build_fs_interp_mov(&ctx->ac,
>                                                               LLVMConstInt(ctx->ac.i32, 2, false),
> @@ -2100,7 +2104,8 @@ static void interp_fs_input(struct radv_shader_context *ctx,
>
>  static void
>  handle_fs_input_decl(struct radv_shader_context *ctx,
> -                    struct nir_variable *variable)
> +                    struct nir_variable *variable,
> +                    uint64_t *fp16_mask)
>  {
>         int idx = variable->data.location;
>         unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
> @@ -2110,7 +2115,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
>         variable->data.driver_location = idx * 4;
>         mask = ((1ull << attrib_count) - 1) << variable->data.location;
>
> -       if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
> +       enum glsl_base_type type = glsl_get_base_type(glsl_without_array(variable->type));
> +       if (type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_FLOAT16) {
>                 unsigned interp_type;
>                 if (variable->data.sample)
>                         interp_type = INTERP_SAMPLE;
> @@ -2120,6 +2126,9 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
>                         interp_type = INTERP_CENTER;
>
>                 interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
> +
> +               if (type == GLSL_TYPE_FLOAT16)
> +                       *fp16_mask |= mask;
>         }
>
>         for (unsigned i = 0; i < attrib_count; ++i)
> @@ -2173,8 +2182,9 @@ handle_fs_inputs(struct radv_shader_context *ctx,
>  {
>         prepare_interp_optimize(ctx, nir);
>
> +       uint64_t fp16_mask = 0;
>         nir_foreach_variable(variable, &nir->inputs)
> -               handle_fs_input_decl(ctx, variable);
> +               handle_fs_input_decl(ctx, variable, &fp16_mask);
>
>         unsigned index = 0;
>
> @@ -2194,11 +2204,14 @@ handle_fs_inputs(struct radv_shader_context *ctx,
>                 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
>                     i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
>                         interp_param = *inputs;
> +                       bool fp16 = fp16_mask & (1ull << i);
>                         interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
> -                                       inputs);
> +                                       inputs, fp16);
>
>                         if (!interp_param)
>                                 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
> +                       if (fp16)
> +                               ctx->shader_info->fs.fp16_mask |= 1u << index;
>                         if (i >= VARYING_SLOT_VAR0)
>                                 ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
>                         ++index;
> @@ -2210,7 +2223,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
>
>                                 interp_param = *inputs;
>                                 interp_fs_input(ctx, index, interp_param,
> -                                               ctx->abi.prim_mask, inputs);
> +                                               ctx->abi.prim_mask, inputs, false);
>                                 ++index;
>                         }
>                 } else if (i == VARYING_SLOT_POS) {
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index ab56a273a2c..a3260291bce 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -3070,13 +3070,15 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
>         radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
>  }
>
> -static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
> +static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool fp16)
>  {
>         uint32_t ps_input_cntl;
>         if (offset <= AC_EXP_PARAM_OFFSET_31) {
>                 ps_input_cntl = S_028644_OFFSET(offset);
>                 if (flat_shade)
>                         ps_input_cntl |= S_028644_FLAT_SHADE(1);
> +               if (fp16 && !flat_shade)
> +                       ps_input_cntl |= S_028644_FP16_INTERP_MODE(1);
>         } else {
>                 /* The input is a DEFAULT_VAL constant. */
>                 assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
> @@ -3101,7 +3103,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
>         if (ps->info.info.ps.prim_id_input) {
>                 unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
>                 if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
> -                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
> +                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
>                         ++ps_offset;
>                 }
>         }
> @@ -3111,9 +3113,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
>             ps->info.info.needs_multiview_view_index) {
>                 unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
>                 if (vs_offset != AC_EXP_PARAM_UNDEFINED)
> -                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
> +                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
>                 else
> -                       ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
> +                       ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
>                 ++ps_offset;
>         }
>
> @@ -3129,21 +3131,21 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
>
>                 vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
>                 if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
> -                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
> +                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
>                         ++ps_offset;
>                 }
>
>                 vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
>                 if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
>                     ps->info.info.ps.num_input_clips_culls > 4) {
> -                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
> +                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
>                         ++ps_offset;
>                 }
>         }
>
>         for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
>                 unsigned vs_offset;
> -               bool flat_shade;
> +               bool flat_shade, fp16;
>                 if (!(ps->info.fs.input_mask & (1u << i)))
>                         continue;
>
> @@ -3155,8 +3157,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
>                 }
>
>                 flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
> +               fp16 = !!(ps->info.fs.fp16_mask & (1u << ps_offset));
>
> -               ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
> +               ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, fp16);
>                 ++ps_offset;
>         }
>
> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
> index b67cd2b4f15..f0e9bc249f9 100644
> --- a/src/amd/vulkan/radv_shader.h
> +++ b/src/amd/vulkan/radv_shader.h
> @@ -257,6 +257,7 @@ struct radv_shader_variant_info {
>                         unsigned num_interp;
>                         uint32_t input_mask;
>                         uint32_t flat_shaded_mask;
> +                       uint32_t fp16_mask;
>                         bool can_discard;
>                         bool early_fragment_test;
>                 } fs;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index efae02ee91c..c1f82137020 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1751,7 +1751,7 @@ static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
>                 return ac_build_fs_interp(&ctx->ac,
>                                           LLVMConstInt(ctx->i32, chan, 0),
>                                           LLVMConstInt(ctx->i32, attr_index, 0),
> -                                         prim_mask, i, j);
> +                                         prim_mask, i, j, -1);
>         }
>         return ac_build_fs_interp_mov(&ctx->ac,
>                                       LLVMConstInt(ctx->i32, 2, 0), /* P0 */
> --
> 2.20.1
>


More information about the mesa-dev mailing list