[Mesa-dev] [PATCH v2 37/41] radv, ac: implement 16-bit interpolation
Rhys Perry
pendingchaos02 at gmail.com
Sat Feb 16 00:36:20 UTC 2019
This patch can be ignored. I forgot to delete it and it ended up getting sent.
"[PATCH v2 37/41] WIP: radv, ac: implement 16-bit interpolation" is
the correct one.
On Sat, 16 Feb 2019 at 00:23, Rhys Perry <pendingchaos02 at gmail.com> wrote:
>
> v2: add to patch series
>
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
> src/amd/common/ac_llvm_build.c | 33 +++++++++++++++++-------
> src/amd/common/ac_llvm_build.h | 3 ++-
> src/amd/common/ac_nir_to_llvm.c | 14 +++++++---
> src/amd/vulkan/radv_nir_to_llvm.c | 27 ++++++++++++++-----
> src/amd/vulkan/radv_pipeline.c | 19 ++++++++------
> src/amd/vulkan/radv_shader.h | 1 +
> src/gallium/drivers/radeonsi/si_shader.c | 2 +-
> 7 files changed, 69 insertions(+), 30 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index dff369aae7f..be2c2251a21 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -937,27 +937,40 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
> LLVMValueRef attr_number,
> LLVMValueRef params,
> LLVMValueRef i,
> - LLVMValueRef j)
> + LLVMValueRef j,
> + int word)
> {
> - LLVMValueRef args[5];
> + LLVMValueRef args[6];
> LLVMValueRef p1;
>
> args[0] = i;
> args[1] = llvm_chan;
> args[2] = attr_number;
> - args[3] = params;
> -
> - p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
> - ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
> + if (word >= 0) {
> + args[3] = LLVMConstInt(ctx->i1, word, false);
> + args[4] = params;
> + p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
> + ctx->f16, args, 5, AC_FUNC_ATTR_READNONE);
> + } else {
> + args[3] = params;
> + p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
> + ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
> + }
>
> args[0] = p1;
> args[1] = j;
> args[2] = llvm_chan;
> args[3] = attr_number;
> - args[4] = params;
> -
> - return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
> - ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
> + if (word >= 0) {
> + args[4] = LLVMConstInt(ctx->i1, word, false);
> + args[5] = params;
> + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
> + ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
> + } else {
> + args[4] = params;
> + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
> + ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
> + }
> }
>
> LLVMValueRef
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 61c9b5e4b6c..655427567c4 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -224,7 +224,8 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
> LLVMValueRef attr_number,
> LLVMValueRef params,
> LLVMValueRef i,
> - LLVMValueRef j);
> + LLVMValueRef j,
> + int word);
>
> LLVMValueRef
> ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index bf7024c68e4..939b8eb13de 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3120,8 +3120,15 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
> LLVMValueRef j = LLVMBuildExtractElement(
> ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
>
> + /* This fp16 handling isn't technically correct
> + * but should be correct for the attributes we
> + * are actually going to use. */
> + bool fp16 = instr->dest.ssa.bit_size == 16;
> + int word = fp16 ? 0 : -1;
> v = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
> - ctx->abi->prim_mask, i, j);
> + ctx->abi->prim_mask, i, j, word);
> + if (fp16)
> + v = ac_build_reinterpret(&ctx->ac, v, ctx->ac.f32);
> } else {
> v = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, false),
> llvm_chan, attr_number, ctx->abi->prim_mask);
> @@ -3134,8 +3141,9 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
> result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, attrib_idx, "");
>
> }
> - return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
> - var->data.location_frac);
> + LLVMValueRef ret = ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
> + var->data.location_frac);
> + return ac_build_reinterpret(&ctx->ac, ret, get_def_type(ctx, &instr->dest.ssa));
> }
>
> static void visit_intrinsic(struct ac_nir_context *ctx,
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
> index c46eabf3656..49f8d35dd5f 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2051,7 +2051,8 @@ static void interp_fs_input(struct radv_shader_context *ctx,
> unsigned attr,
> LLVMValueRef interp_param,
> LLVMValueRef prim_mask,
> - LLVMValueRef result[4])
> + LLVMValueRef result[4],
> + bool fp16)
> {
> LLVMValueRef attr_number;
> unsigned chan;
> @@ -2086,7 +2087,10 @@ static void interp_fs_input(struct radv_shader_context *ctx,
> result[chan] = ac_build_fs_interp(&ctx->ac,
> llvm_chan,
> attr_number,
> - prim_mask, i, j);
> + prim_mask, i, j,
> + fp16 ? 0 : -1);
> + if (fp16)
> + result[chan] = ac_build_reinterpret(&ctx->ac, result[chan], ctx->ac.f16);
> } else {
> result[chan] = ac_build_fs_interp_mov(&ctx->ac,
> LLVMConstInt(ctx->ac.i32, 2, false),
> @@ -2100,7 +2104,8 @@ static void interp_fs_input(struct radv_shader_context *ctx,
>
> static void
> handle_fs_input_decl(struct radv_shader_context *ctx,
> - struct nir_variable *variable)
> + struct nir_variable *variable,
> + uint64_t *fp16_mask)
> {
> int idx = variable->data.location;
> unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
> @@ -2110,7 +2115,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
> variable->data.driver_location = idx * 4;
> mask = ((1ull << attrib_count) - 1) << variable->data.location;
>
> - if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
> + enum glsl_base_type type = glsl_get_base_type(glsl_without_array(variable->type));
> + if (type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_FLOAT16) {
> unsigned interp_type;
> if (variable->data.sample)
> interp_type = INTERP_SAMPLE;
> @@ -2120,6 +2126,9 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
> interp_type = INTERP_CENTER;
>
> interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
> +
> + if (type == GLSL_TYPE_FLOAT16)
> + *fp16_mask |= mask;
> }
>
> for (unsigned i = 0; i < attrib_count; ++i)
> @@ -2173,8 +2182,9 @@ handle_fs_inputs(struct radv_shader_context *ctx,
> {
> prepare_interp_optimize(ctx, nir);
>
> + uint64_t fp16_mask = 0;
> nir_foreach_variable(variable, &nir->inputs)
> - handle_fs_input_decl(ctx, variable);
> + handle_fs_input_decl(ctx, variable, &fp16_mask);
>
> unsigned index = 0;
>
> @@ -2194,11 +2204,14 @@ handle_fs_inputs(struct radv_shader_context *ctx,
> if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
> i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
> interp_param = *inputs;
> + bool fp16 = fp16_mask & (1ull << i);
> interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
> - inputs);
> + inputs, fp16);
>
> if (!interp_param)
> ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
> + if (fp16)
> + ctx->shader_info->fs.fp16_mask |= 1u << index;
> if (i >= VARYING_SLOT_VAR0)
> ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
> ++index;
> @@ -2210,7 +2223,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
>
> interp_param = *inputs;
> interp_fs_input(ctx, index, interp_param,
> - ctx->abi.prim_mask, inputs);
> + ctx->abi.prim_mask, inputs, false);
> ++index;
> }
> } else if (i == VARYING_SLOT_POS) {
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index ab56a273a2c..a3260291bce 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -3070,13 +3070,15 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
> radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
> }
>
> -static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
> +static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool fp16)
> {
> uint32_t ps_input_cntl;
> if (offset <= AC_EXP_PARAM_OFFSET_31) {
> ps_input_cntl = S_028644_OFFSET(offset);
> if (flat_shade)
> ps_input_cntl |= S_028644_FLAT_SHADE(1);
> + if (fp16 && !flat_shade)
> + ps_input_cntl |= S_028644_FP16_INTERP_MODE(1);
> } else {
> /* The input is a DEFAULT_VAL constant. */
> assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
> @@ -3101,7 +3103,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
> if (ps->info.info.ps.prim_id_input) {
> unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
> if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
> - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
> + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
> ++ps_offset;
> }
> }
> @@ -3111,9 +3113,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
> ps->info.info.needs_multiview_view_index) {
> unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
> if (vs_offset != AC_EXP_PARAM_UNDEFINED)
> - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
> + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
> else
> - ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
> + ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
> ++ps_offset;
> }
>
> @@ -3129,21 +3131,21 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
>
> vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
> if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
> - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
> + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
> ++ps_offset;
> }
>
> vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
> if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
> ps->info.info.ps.num_input_clips_culls > 4) {
> - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
> + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
> ++ps_offset;
> }
> }
>
> for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
> unsigned vs_offset;
> - bool flat_shade;
> + bool flat_shade, fp16;
> if (!(ps->info.fs.input_mask & (1u << i)))
> continue;
>
> @@ -3155,8 +3157,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
> }
>
> flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
> + fp16 = !!(ps->info.fs.fp16_mask & (1u << ps_offset));
>
> - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
> + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, fp16);
> ++ps_offset;
> }
>
> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
> index b67cd2b4f15..f0e9bc249f9 100644
> --- a/src/amd/vulkan/radv_shader.h
> +++ b/src/amd/vulkan/radv_shader.h
> @@ -257,6 +257,7 @@ struct radv_shader_variant_info {
> unsigned num_interp;
> uint32_t input_mask;
> uint32_t flat_shaded_mask;
> + uint32_t fp16_mask;
> bool can_discard;
> bool early_fragment_test;
> } fs;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index efae02ee91c..c1f82137020 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1751,7 +1751,7 @@ static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
> return ac_build_fs_interp(&ctx->ac,
> LLVMConstInt(ctx->i32, chan, 0),
> LLVMConstInt(ctx->i32, attr_index, 0),
> - prim_mask, i, j);
> + prim_mask, i, j, -1);
> }
> return ac_build_fs_interp_mov(&ctx->ac,
> LLVMConstInt(ctx->i32, 2, 0), /* P0 */
> --
> 2.20.1
>
More information about the mesa-dev
mailing list