Mesa (main): aco: add instr_is_16bit() helper function

Mon Aug 23 10:48:49 UTC 2021

Module: Mesa
Branch: main
Commit: e11b23f7cd3d848ea278d09f285a186371c70b44
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e11b23f7cd3d848ea278d09f285a186371c70b44

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Wed Aug 18 18:56:59 2021 +0200

aco: add instr_is_16bit() helper function

to indicate whether some instruction writes partial registers, only.

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12364>

---

 src/amd/compiler/aco_ir.cpp | 59 +++++++++++++++++++++++++++++++++++++++++++++
 src/amd/compiler/aco_ir.h   |  1 +
 2 files changed, 60 insertions(+)

diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 61cf3c2c43c..a276c2e7527 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -419,6 +419,65 @@ can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high)
    }
 }
 
+bool
+instr_is_16bit(chip_class chip, aco_opcode op)
+{
+   /* partial register writes are GFX9+, only */
+   if (chip < GFX9)
+      return false;
+
+   switch (op) {
+   /* VOP3 */
+   case aco_opcode::v_mad_f16:
+   case aco_opcode::v_mad_u16:
+   case aco_opcode::v_mad_i16:
+   case aco_opcode::v_fma_f16:
+   case aco_opcode::v_div_fixup_f16:
+   case aco_opcode::v_interp_p2_f16:
+   case aco_opcode::v_fma_mixlo_f16:
+   /* VOP2 */
+   case aco_opcode::v_mac_f16:
+   case aco_opcode::v_madak_f16:
+   case aco_opcode::v_madmk_f16: return chip >= GFX9;
+   case aco_opcode::v_add_f16:
+   case aco_opcode::v_sub_f16:
+   case aco_opcode::v_subrev_f16:
+   case aco_opcode::v_mul_f16:
+   case aco_opcode::v_max_f16:
+   case aco_opcode::v_min_f16:
+   case aco_opcode::v_ldexp_f16:
+   case aco_opcode::v_fmac_f16:
+   case aco_opcode::v_fmamk_f16:
+   case aco_opcode::v_fmaak_f16:
+   /* VOP1 */
+   case aco_opcode::v_cvt_f16_f32:
+   case aco_opcode::v_cvt_f16_u16:
+   case aco_opcode::v_cvt_f16_i16:
+   case aco_opcode::v_rcp_f16:
+   case aco_opcode::v_sqrt_f16:
+   case aco_opcode::v_rsq_f16:
+   case aco_opcode::v_log_f16:
+   case aco_opcode::v_exp_f16:
+   case aco_opcode::v_frexp_mant_f16:
+   case aco_opcode::v_frexp_exp_i16_f16:
+   case aco_opcode::v_floor_f16:
+   case aco_opcode::v_ceil_f16:
+   case aco_opcode::v_trunc_f16:
+   case aco_opcode::v_rndne_f16:
+   case aco_opcode::v_fract_f16:
+   case aco_opcode::v_sin_f16:
+   case aco_opcode::v_cos_f16: return chip >= GFX10;
+   // TODO: confirm whether these write 16 or 32 bit on GFX10+
+   // case aco_opcode::v_cvt_u16_f16:
+   // case aco_opcode::v_cvt_i16_f16:
+   // case aco_opcode::p_cvt_f16_f32_rtne:
+   // case aco_opcode::v_cvt_norm_i16_f16:
+   // case aco_opcode::v_cvt_norm_u16_f16:
+   /* on GFX10, all opsel instructions preserve the high bits */
+   default: return chip >= GFX10 && can_use_opsel(chip, op, -1, false);
+   }
+}
+
 uint32_t
 get_reduction_identity(ReduceOp op, unsigned idx)
 {
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 0c5c8b767aa..0fa4fc824d5 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1729,6 +1729,7 @@ memory_sync_info get_sync_info(const Instruction* instr);
 bool is_dead(const std::vector<uint16_t>& uses, Instruction* instr);
 
 bool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high);
+bool instr_is_16bit(chip_class chip, aco_opcode op);
 bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra);
 bool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra);
 /* updates "instr" and returns the old instruction (or NULL if no update was needed) */