Mesa (main): intel/fs: Emit better code for u2u of extract
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Aug 18 22:41:19 UTC 2021
Module: Mesa
Branch: main
Commit: 7c83aa0518988a3b2bc2bc6bf74d808db86982d1
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c83aa0518988a3b2bc2bc6bf74d808db86982d1
Author: Ian Romanick <ian.d.romanick at intel.com>
Date: Tue Jan 26 19:52:50 2021 -0800
intel/fs: Emit better code for u2u of extract
Emitting the instructions one by one results in two MOV instructions
that won't be propagated. By handling both instructions at once, a
single MOV is emitted. For example, on Ice Lake this helps
dEQP-VK.spirv_assembly.type.vec3.i8.bitwise_xor_frag:
SIMD8 shader: 49 instructions. 1 loops. 4044 cycles. 0:0 spills:fills, 5 sends
SIMD8 shader: 41 instructions. 1 loops. 3804 cycles. 0:0 spills:fills, 5 sends
Without "intel/fs: Allow copy propagation between MOVs of mixed sizes,"
the improvement is still 8 instructions, but there are more instructions
to begin with:
SIMD8 shader: 52 instructions. 1 loops. 4164 cycles. 0:0 spills:fills, 5 sends
SIMD8 shader: 44 instructions. 1 loops. 3944 cycles. 0:0 spills:fills, 5 sends
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9025>
---
src/intel/compiler/brw_fs_nir.cpp | 42 +++++++++++++++++++++++++++++++++++----
1 file changed, 38 insertions(+), 4 deletions(-)
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index cf71e229806..2bc8ea4e44e 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1102,13 +1102,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
case nir_op_f2i32:
case nir_op_f2u32:
case nir_op_i2f16:
- case nir_op_i2i16:
case nir_op_u2f16:
- case nir_op_u2u16:
case nir_op_f2i16:
case nir_op_f2u16:
- case nir_op_i2i8:
- case nir_op_u2u8:
case nir_op_f2i8:
case nir_op_f2u8:
if (result.type == BRW_REGISTER_TYPE_B ||
@@ -1124,6 +1120,44 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
inst = bld.MOV(result, op[0]);
break;
+ case nir_op_i2i8:
+ case nir_op_u2u8:
+ assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */
+ FALLTHROUGH;
+ case nir_op_i2i16:
+ case nir_op_u2u16: {
+ /* Emit better code for u2u8(extract_u8(a, b)) and similar patterns.
+ * Emitting the instructions one by one results in two MOV instructions
+ * that won't be propagated. By handling both instructions here, a
+ * single MOV is emitted.
+ */
+ nir_alu_instr *extract_instr = nir_src_as_alu_instr(instr->src[0].src);
+ if (extract_instr != NULL) {
+ if (extract_instr->op == nir_op_extract_u8 ||
+ extract_instr->op == nir_op_extract_i8) {
+ prepare_alu_destination_and_sources(bld, extract_instr, op, false);
+
+ const unsigned byte = nir_src_as_uint(extract_instr->src[1].src);
+ const brw_reg_type type =
+ brw_int_type(1, extract_instr->op == nir_op_extract_i8);
+
+ op[0] = subscript(op[0], type, byte);
+ } else if (extract_instr->op == nir_op_extract_u16 ||
+ extract_instr->op == nir_op_extract_i16) {
+ prepare_alu_destination_and_sources(bld, extract_instr, op, false);
+
+ const unsigned word = nir_src_as_uint(extract_instr->src[1].src);
+ const brw_reg_type type =
+ brw_int_type(2, extract_instr->op == nir_op_extract_i16);
+
+ op[0] = subscript(op[0], type, word);
+ }
+ }
+
+ inst = bld.MOV(result, op[0]);
+ break;
+ }
+
case nir_op_fsat:
inst = bld.MOV(result, op[0]);
inst->saturate = true;
More information about the mesa-commit
mailing list