[Mesa-dev] [PATCH v2 37/41] radv, ac: implement 16-bit interpolation
Rhys Perry
pendingchaos02 at gmail.com
Sat Feb 16 00:22:26 UTC 2019
v2: add to patch series
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
src/amd/common/ac_llvm_build.c | 33 +++++++++++++++++-------
src/amd/common/ac_llvm_build.h | 3 ++-
src/amd/common/ac_nir_to_llvm.c | 14 +++++++---
src/amd/vulkan/radv_nir_to_llvm.c | 27 ++++++++++++++-----
src/amd/vulkan/radv_pipeline.c | 19 ++++++++------
src/amd/vulkan/radv_shader.h | 1 +
src/gallium/drivers/radeonsi/si_shader.c | 2 +-
7 files changed, 69 insertions(+), 30 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index dff369aae7f..be2c2251a21 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -937,27 +937,40 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
LLVMValueRef attr_number,
LLVMValueRef params,
LLVMValueRef i,
- LLVMValueRef j)
+ LLVMValueRef j,
+ int word)
{
- LLVMValueRef args[5];
+ LLVMValueRef args[6];
LLVMValueRef p1;
args[0] = i;
args[1] = llvm_chan;
args[2] = attr_number;
- args[3] = params;
-
- p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
- ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
+ if (word >= 0) {
+ args[3] = LLVMConstInt(ctx->i1, word, false);
+ args[4] = params;
+ p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
+ ctx->f16, args, 5, AC_FUNC_ATTR_READNONE);
+ } else {
+ args[3] = params;
+ p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
+ ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
+ }
args[0] = p1;
args[1] = j;
args[2] = llvm_chan;
args[3] = attr_number;
- args[4] = params;
-
- return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
- ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+ if (word >= 0) {
+ args[4] = LLVMConstInt(ctx->i1, word, false);
+ args[5] = params;
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
+ ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
+ } else {
+ args[4] = params;
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
+ ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+ }
}
LLVMValueRef
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 61c9b5e4b6c..655427567c4 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -224,7 +224,8 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
LLVMValueRef attr_number,
LLVMValueRef params,
LLVMValueRef i,
- LLVMValueRef j);
+ LLVMValueRef j,
+ int word);
LLVMValueRef
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index bf7024c68e4..939b8eb13de 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3120,8 +3120,15 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
LLVMValueRef j = LLVMBuildExtractElement(
ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
+ /* This fp16 handling isn't technically correct
+ * but should be correct for the attributes we
+ * are actually going to use. */
+ bool fp16 = instr->dest.ssa.bit_size == 16;
+ int word = fp16 ? 0 : -1;
v = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
- ctx->abi->prim_mask, i, j);
+ ctx->abi->prim_mask, i, j, word);
+ if (fp16)
+ v = ac_build_reinterpret(&ctx->ac, v, ctx->ac.f32);
} else {
v = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, false),
llvm_chan, attr_number, ctx->abi->prim_mask);
@@ -3134,8 +3141,9 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, attrib_idx, "");
}
- return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
- var->data.location_frac);
+ LLVMValueRef ret = ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
+ var->data.location_frac);
+ return ac_build_reinterpret(&ctx->ac, ret, get_def_type(ctx, &instr->dest.ssa));
}
static void visit_intrinsic(struct ac_nir_context *ctx,
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index c46eabf3656..49f8d35dd5f 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2051,7 +2051,8 @@ static void interp_fs_input(struct radv_shader_context *ctx,
unsigned attr,
LLVMValueRef interp_param,
LLVMValueRef prim_mask,
- LLVMValueRef result[4])
+ LLVMValueRef result[4],
+ bool fp16)
{
LLVMValueRef attr_number;
unsigned chan;
@@ -2086,7 +2087,10 @@ static void interp_fs_input(struct radv_shader_context *ctx,
result[chan] = ac_build_fs_interp(&ctx->ac,
llvm_chan,
attr_number,
- prim_mask, i, j);
+ prim_mask, i, j,
+ fp16 ? 0 : -1);
+ if (fp16)
+ result[chan] = ac_build_reinterpret(&ctx->ac, result[chan], ctx->ac.f16);
} else {
result[chan] = ac_build_fs_interp_mov(&ctx->ac,
LLVMConstInt(ctx->ac.i32, 2, false),
@@ -2100,7 +2104,8 @@ static void interp_fs_input(struct radv_shader_context *ctx,
static void
handle_fs_input_decl(struct radv_shader_context *ctx,
- struct nir_variable *variable)
+ struct nir_variable *variable,
+ uint64_t *fp16_mask)
{
int idx = variable->data.location;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
@@ -2110,7 +2115,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
variable->data.driver_location = idx * 4;
mask = ((1ull << attrib_count) - 1) << variable->data.location;
- if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
+ enum glsl_base_type type = glsl_get_base_type(glsl_without_array(variable->type));
+ if (type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_FLOAT16) {
unsigned interp_type;
if (variable->data.sample)
interp_type = INTERP_SAMPLE;
@@ -2120,6 +2126,9 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
interp_type = INTERP_CENTER;
interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
+
+ if (type == GLSL_TYPE_FLOAT16)
+ *fp16_mask |= mask;
}
for (unsigned i = 0; i < attrib_count; ++i)
@@ -2173,8 +2182,9 @@ handle_fs_inputs(struct radv_shader_context *ctx,
{
prepare_interp_optimize(ctx, nir);
+ uint64_t fp16_mask = 0;
nir_foreach_variable(variable, &nir->inputs)
- handle_fs_input_decl(ctx, variable);
+ handle_fs_input_decl(ctx, variable, &fp16_mask);
unsigned index = 0;
@@ -2194,11 +2204,14 @@ handle_fs_inputs(struct radv_shader_context *ctx,
if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
interp_param = *inputs;
+ bool fp16 = fp16_mask & (1ull << i);
interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
- inputs);
+ inputs, fp16);
if (!interp_param)
ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
+ if (fp16)
+ ctx->shader_info->fs.fp16_mask |= 1u << index;
if (i >= VARYING_SLOT_VAR0)
ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
++index;
@@ -2210,7 +2223,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
interp_param = *inputs;
interp_fs_input(ctx, index, interp_param,
- ctx->abi.prim_mask, inputs);
+ ctx->abi.prim_mask, inputs, false);
++index;
}
} else if (i == VARYING_SLOT_POS) {
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index ab56a273a2c..a3260291bce 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3070,13 +3070,15 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
}
-static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool fp16)
{
uint32_t ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {
ps_input_cntl = S_028644_OFFSET(offset);
if (flat_shade)
ps_input_cntl |= S_028644_FLAT_SHADE(1);
+ if (fp16 && !flat_shade)
+ ps_input_cntl |= S_028644_FP16_INTERP_MODE(1);
} else {
/* The input is a DEFAULT_VAL constant. */
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
@@ -3101,7 +3103,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
if (ps->info.info.ps.prim_id_input) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
++ps_offset;
}
}
@@ -3111,9 +3113,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
ps->info.info.needs_multiview_view_index) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
if (vs_offset != AC_EXP_PARAM_UNDEFINED)
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
else
- ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
++ps_offset;
}
@@ -3129,21 +3131,21 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
++ps_offset;
}
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
ps->info.info.ps.num_input_clips_culls > 4) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
++ps_offset;
}
}
for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
unsigned vs_offset;
- bool flat_shade;
+ bool flat_shade, fp16;
if (!(ps->info.fs.input_mask & (1u << i)))
continue;
@@ -3155,8 +3157,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
}
flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
+ fp16 = !!(ps->info.fs.fp16_mask & (1u << ps_offset));
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, fp16);
++ps_offset;
}
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index b67cd2b4f15..f0e9bc249f9 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -257,6 +257,7 @@ struct radv_shader_variant_info {
unsigned num_interp;
uint32_t input_mask;
uint32_t flat_shaded_mask;
+ uint32_t fp16_mask;
bool can_discard;
bool early_fragment_test;
} fs;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index efae02ee91c..c1f82137020 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1751,7 +1751,7 @@ static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
return ac_build_fs_interp(&ctx->ac,
LLVMConstInt(ctx->i32, chan, 0),
LLVMConstInt(ctx->i32, attr_index, 0),
- prim_mask, i, j);
+ prim_mask, i, j, -1);
}
return ac_build_fs_interp_mov(&ctx->ac,
LLVMConstInt(ctx->i32, 2, 0), /* P0 */
--
2.20.1
More information about the mesa-dev
mailing list