Mesa (master): aco: generalize subdword constant copy lowering
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Jan 21 11:13:24 UTC 2021
Module: Mesa
Branch: master
Commit: c0cec3a29bb0f77f16645561e70d2a7eca12be6a
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c0cec3a29bb0f77f16645561e70d2a7eca12be6a
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Wed Dec 30 15:06:04 2020 +0000
aco: generalize subdword constant copy lowering
This will allow to propagate and emit sub-register constants
on all hardware generations.
Also fixes GFX8 constant emission to not use SDWA.
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8260>
---
src/amd/compiler/aco_lower_to_hw_instr.cpp | 82 ++++++++++++++++--------------
1 file changed, 43 insertions(+), 39 deletions(-)
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index a4e2d5b6404..56da58bd333 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -1023,49 +1023,53 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op)
}
} else if (dst.regClass() == v1) {
bld.vop1(aco_opcode::v_mov_b32, dst, op);
- } else if (dst.regClass() == v1b) {
- assert(ctx->program->chip_class >= GFX8);
- uint8_t val = op.constantValue();
- Operand op32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u));
- aco_ptr<SDWA_instruction> sdwa;
- if (op32.isLiteral()) {
- uint32_t a = (uint32_t)int8_mul_table[val * 2];
- uint32_t b = (uint32_t)int8_mul_table[val * 2 + 1];
- bld.vop2_sdwa(aco_opcode::v_mul_u32_u24, dst,
- Operand(a | (a & 0x80u ? 0xffffff00u : 0x0u)),
- Operand(b | (b & 0x80u ? 0xffffff00u : 0x0u)));
- } else {
- bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op32);
- }
- } else if (dst.regClass() == v2b && op.isConstant() && !op.isLiteral()) {
- assert(ctx->program->chip_class >= GFX8);
- if (op.constantValue() >= 0xfff0 || op.constantValue() <= 64) {
- /* use v_mov_b32 to avoid possible issues with denormal flushing or
- * NaN. v_add_f16 is still needed for float constants. */
- uint32_t val32 = (int32_t)(int16_t)op.constantValue();
- bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, Operand(val32));
+ } else {
+ assert(dst.regClass() == v1b || dst.regClass() == v2b);
+
+ if (dst.regClass() == v1b && ctx->program->chip_class >= GFX9) {
+ uint8_t val = op.constantValue();
+ Operand op32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u));
+ if (op32.isLiteral()) {
+ uint32_t a = (uint32_t)int8_mul_table[val * 2];
+ uint32_t b = (uint32_t)int8_mul_table[val * 2 + 1];
+ bld.vop2_sdwa(aco_opcode::v_mul_u32_u24, dst,
+ Operand(a | (a & 0x80u ? 0xffffff00u : 0x0u)),
+ Operand(b | (b & 0x80u ? 0xffffff00u : 0x0u)));
+ } else {
+ bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op32);
+ }
+ } else if (dst.regClass() == v2b && ctx->program->chip_class >= GFX9 && !op.isLiteral()) {
+ if (op.constantValue() >= 0xfff0 || op.constantValue() <= 64) {
+ /* use v_mov_b32 to avoid possible issues with denormal flushing or
+ * NaN. v_add_f16 is still needed for float constants. */
+ uint32_t val32 = (int32_t)(int16_t)op.constantValue();
+ bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, Operand(val32));
+ } else {
+ bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand(0u));
+ }
+ } else if (dst.regClass() == v2b && ctx->program->chip_class >= GFX10 &&
+ (ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in)) {
+ if (dst.physReg().byte() == 2) {
+ Operand def_lo(dst.physReg().advance(-2), v2b);
+ Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
+ static_cast<VOP3A_instruction*>(instr)->opsel = 0;
+ } else {
+ assert(dst.physReg().byte() == 0);
+ Operand def_hi(dst.physReg().advance(2), v2b);
+ Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
+ static_cast<VOP3A_instruction*>(instr)->opsel = 2;
+ }
} else {
- bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand(0u));
- }
- } else if (dst.regClass() == v2b && op.isLiteral()) {
- if (ctx->program->chip_class < GFX10 || !(ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in)) {
- unsigned offset = dst.physReg().byte() * 8u;
+ uint32_t offset = dst.physReg().byte() * 8u;
+ uint32_t mask = ((1u << (dst.bytes() * 8)) - 1) << offset;
+ uint32_t val = (op.constantValue() << offset) & mask;
dst = Definition(PhysReg(dst.physReg().reg()), v1);
Operand def_op(dst.physReg(), v1);
- bld.vop2(aco_opcode::v_and_b32, dst, Operand(~(0xffffu << offset)), def_op);
- bld.vop2(aco_opcode::v_or_b32, dst, Operand(op.constantValue() << offset), def_op);
- } else if (dst.physReg().byte() == 2) {
- Operand def_lo(dst.physReg().advance(-2), v2b);
- Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
- static_cast<VOP3A_instruction*>(instr)->opsel = 0;
- } else {
- assert(dst.physReg().byte() == 0);
- Operand def_hi(dst.physReg().advance(2), v2b);
- Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
- static_cast<VOP3A_instruction*>(instr)->opsel = 2;
+ if (val != mask)
+ bld.vop2(aco_opcode::v_and_b32, dst, Operand(~mask), def_op);
+ if (val != 0)
+ bld.vop2(aco_opcode::v_or_b32, dst, Operand(val), def_op);
}
- } else {
- unreachable("unsupported copy");
}
}
More information about the mesa-commit
mailing list