Mesa (main): radv,aco: Don't lower and vectorize 16bit iabs.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jul 20 15:08:39 UTC 2022
Module: Mesa
Branch: main
Commit: b96126ee959df56d21783fea9e680346086860f5
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b96126ee959df56d21783fea9e680346086860f5
Author: Georg Lehmann <dadschoorse at gmail.com>
Date: Sat Jul 9 13:32:28 2022 +0200
radv,aco: Don't lower and vectorize 16bit iabs.
Signed-off-by: Georg Lehmann <dadschoorse at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17440>
---
src/amd/compiler/aco_instruction_selection.cpp | 19 +++++++++++++++++++
src/amd/compiler/aco_instruction_selection_setup.cpp | 1 +
src/amd/vulkan/radv_pipeline.c | 3 ++-
3 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index e3de4efb805..69b82d3e2c8 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -1495,12 +1495,31 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
break;
}
case nir_op_iabs: {
+ if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
+ Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
+
+ unsigned opsel_lo = (instr->src[0].swizzle[0] & 1) << 1;
+ unsigned opsel_hi = ((instr->src[0].swizzle[1] & 1) << 1) | 1;
+
+ Temp sub = bld.vop3p(aco_opcode::v_pk_sub_u16, Definition(bld.tmp(v1)), Operand::zero(),
+ src, opsel_lo, opsel_hi);
+ bld.vop3p(aco_opcode::v_pk_max_i16, Definition(dst), sub, src, opsel_lo, opsel_hi);
+ break;
+ }
Temp src = get_alu_src(ctx, instr->src[0]);
if (dst.regClass() == s1) {
bld.sop1(aco_opcode::s_abs_i32, Definition(dst), bld.def(s1, scc), src);
} else if (dst.regClass() == v1) {
bld.vop2(aco_opcode::v_max_i32, Definition(dst), src,
bld.vsub32(bld.def(v1), Operand::zero(), src));
+ } else if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
+ bld.vop3(
+ aco_opcode::v_max_i16_e64, Definition(dst), src,
+ bld.vop3(aco_opcode::v_sub_u16_e64, Definition(bld.tmp(v2b)), Operand::zero(2), src));
+ } else if (dst.regClass() == v2b) {
+ src = as_vgpr(ctx, src);
+ bld.vop2(aco_opcode::v_max_i16, Definition(dst), src,
+ bld.vop2(aco_opcode::v_sub_u16, Definition(bld.tmp(v2b)), Operand::zero(2), src));
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index a33776f9897..44cfef4022d 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -556,6 +556,7 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_b2f16:
case nir_op_b2f32:
case nir_op_mov: break;
+ case nir_op_iabs:
case nir_op_iadd:
case nir_op_iadd_sat:
case nir_op_uadd_sat:
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index c9a2cd88f58..bf7a587d6f4 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3947,13 +3947,13 @@ lower_bit_size_callback(const nir_instr *instr, void *_)
if (alu->dest.dest.ssa.bit_size & (8 | 16)) {
unsigned bit_size = alu->dest.dest.ssa.bit_size;
switch (alu->op) {
- case nir_op_iabs:
case nir_op_bitfield_select:
case nir_op_imul_high:
case nir_op_umul_high:
case nir_op_ineg:
case nir_op_isign:
return 32;
+ case nir_op_iabs:
case nir_op_imax:
case nir_op_umax:
case nir_op_imin:
@@ -4026,6 +4026,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
case nir_op_fsat:
case nir_op_fmin:
case nir_op_fmax:
+ case nir_op_iabs:
case nir_op_iadd:
case nir_op_iadd_sat:
case nir_op_uadd_sat:
More information about the mesa-commit
mailing list