Mesa (master): intel/nir: Lower 8-bit ops to 16-bit in NIR on Gen11+
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Nov 9 19:13:34 UTC 2020
Module: Mesa
Branch: master
Commit: 68092df8d8872bffb07dbd21d1d58733651dc97c
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=68092df8d8872bffb07dbd21d1d58733651dc97c
Author: Jason Ekstrand <jason at jlekstrand.net>
Date: Thu Nov 5 23:19:31 2020 -0600
intel/nir: Lower 8-bit ops to 16-bit in NIR on Gen11+
Intel hardware supports 8-bit arithmetic but it's tricky and annoying:
- Byte operations don't actually execute with a byte type. The
execution type for byte operations is actually word. (I don't know
if this has implications for the HW implementation. Probably?)
- Destinations are required to be strided out to at least the
execution type size. This means that B-type operations always have
a stride of at least 2. This means wreaks havoc on the back-end in
multiple ways.
- Thanks to the strided destination, we don't actually save register
space by storing things in bytes. We could, in theory, interleave
two byte values into a single 2B-strided register but that's both a
pain for RA and would lead to piles of false dependencies pre-Gen12
and on Gen12+, we'd need some significant improvements to the SWSB
pass.
- Also thanks to the strided destination, all byte writes are treated
as partial writes by the back-end and we don't know how to copy-prop
them.
- On Gen11, they added a new hardware restriction that byte types
aren't allowed in the 2nd and 3rd sources of instructions. This
means that we have to emit B->W conversions all over to resolve
things. If we emit said conversions in NIR, instead, there's a
chance NIR can get rid of some of them for us.
We can get rid of a lot of this pain by just asking NIR to get rid of
8-bit arithmetic for us. It may lead to a few more conversions in some
cases but having back-end copy-prop actually work is probably a bigger
bonus. There is still a bit we have to handle in the back-end. In
particular, basic MOVs and conversions because 8-bit load/store ops
still require 8-bit types.
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7482>
---
src/intel/compiler/brw_compiler.c | 4 +++-
src/intel/compiler/brw_fs_builder.h | 33 +++++++++------------------------
src/intel/compiler/brw_fs_nir.cpp | 13 +++----------
src/intel/compiler/brw_nir.c | 25 +++++++++++++++++++++++++
4 files changed, 40 insertions(+), 35 deletions(-)
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 9b2a0427144..933b00440dd 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -49,7 +49,6 @@
.vertex_id_zero_based = true, \
.lower_base_vertex = true, \
.use_scoped_barrier = true, \
- .support_8bit_alu = true, \
.support_16bit_alu = true, \
.lower_uniforms_to_ubo = true
@@ -187,6 +186,9 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
nir_options->lower_int64_options = int64_options;
nir_options->lower_doubles_options = fp64_options;
+ /* Starting with Gen11, we lower away 8-bit arithmetic */
+ nir_options->support_8bit_alu = devinfo->gen < 11;
+
nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT;
compiler->glsl_compiler_options[i].NirOptions = nir_options;
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index bdd10638389..368285a3cbe 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -304,11 +304,11 @@ namespace brw {
case SHADER_OPCODE_INT_REMAINDER:
return emit(instruction(opcode, dispatch_width(), dst,
fix_math_operand(src0),
- fix_math_operand(fix_byte_src(src1))));
+ fix_math_operand(src1)));
default:
return emit(instruction(opcode, dispatch_width(), dst,
- src0, fix_byte_src(src1)));
+ src0, src1));
}
}
@@ -327,12 +327,12 @@ namespace brw {
case BRW_OPCODE_LRP:
return emit(instruction(opcode, dispatch_width(), dst,
fix_3src_operand(src0),
- fix_3src_operand(fix_byte_src(src1)),
- fix_3src_operand(fix_byte_src(src2))));
+ fix_3src_operand(src1),
+ fix_3src_operand(src2)));
default:
return emit(instruction(opcode, dispatch_width(), dst,
- src0, fix_byte_src(src1), fix_byte_src(src2)));
+ src0, src1, src2));
}
}
@@ -394,8 +394,8 @@ namespace brw {
/* In some cases we can't have bytes as operand for src1, so use the
* same type for both operand.
*/
- return set_condmod(mod, SEL(dst, fix_unsigned_negate(fix_byte_src(src0)),
- fix_unsigned_negate(fix_byte_src(src1))));
+ return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
}
/**
@@ -659,8 +659,8 @@ namespace brw {
emit(BRW_OPCODE_CSEL,
retype(dst, BRW_REGISTER_TYPE_F),
retype(src0, BRW_REGISTER_TYPE_F),
- retype(fix_byte_src(src1), BRW_REGISTER_TYPE_F),
- fix_byte_src(src2)));
+ retype(src1, BRW_REGISTER_TYPE_F),
+ src2));
}
/**
@@ -721,21 +721,6 @@ namespace brw {
backend_shader *shader;
- /**
- * Byte sized operands are not supported for src1 on Gen11+.
- */
- src_reg
- fix_byte_src(const src_reg &src) const
- {
- if (shader->devinfo->gen < 11 || type_sz(src.type) != 1)
- return src;
-
- dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ?
- BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_D);
- MOV(temp, src);
- return src_reg(temp);
- }
-
private:
/**
* Workaround for negation of UD registers. See comment in
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 38d7540fce0..56a0b2942bf 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1424,18 +1424,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
case nir_op_ine32: {
fs_reg dest = result;
- /* On Gen11 we have an additional issue being that src1 cannot be a byte
- * type. So we convert both operands for the comparison.
- */
- fs_reg temp_op[2];
- temp_op[0] = bld.fix_byte_src(op[0]);
- temp_op[1] = bld.fix_byte_src(op[1]);
-
- const uint32_t bit_size = type_sz(temp_op[0].type) * 8;
+ const uint32_t bit_size = type_sz(op[0].type) * 8;
if (bit_size != 32)
- dest = bld.vgrf(temp_op[0].type, 1);
+ dest = bld.vgrf(op[0].type, 1);
- bld.CMP(dest, temp_op[0], temp_op[1],
+ bld.CMP(dest, op[0], op[1],
brw_cmod_for_nir_comparison(instr->op));
if (bit_size > 32) {
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 282eac338fa..8c45f762c13 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -667,6 +667,15 @@ lower_bit_size_callback(const nir_instr *instr, UNUSED void *data)
case nir_op_fcos:
return devinfo->gen < 9 ? 32 : 0;
default:
+ if (devinfo->gen >= 11) {
+ if (nir_op_infos[alu->op].num_inputs >= 2 &&
+ alu->dest.dest.ssa.bit_size == 8)
+ return 16;
+
+ if (nir_alu_instr_is_comparison(alu) &&
+ alu->src[0].src.ssa->bit_size == 8)
+ return 16;
+ }
return 0;
}
break;
@@ -675,6 +684,22 @@ lower_bit_size_callback(const nir_instr *instr, UNUSED void *data)
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
+ case nir_intrinsic_read_invocation:
+ case nir_intrinsic_read_first_invocation:
+ case nir_intrinsic_vote_feq:
+ case nir_intrinsic_vote_ieq:
+ case nir_intrinsic_shuffle:
+ case nir_intrinsic_shuffle_xor:
+ case nir_intrinsic_shuffle_up:
+ case nir_intrinsic_shuffle_down:
+ case nir_intrinsic_quad_broadcast:
+ case nir_intrinsic_quad_swap_horizontal:
+ case nir_intrinsic_quad_swap_vertical:
+ case nir_intrinsic_quad_swap_diagonal:
+ if (intrin->src[0].ssa->bit_size == 8 && devinfo->gen >= 11)
+ return 16;
+ return 0;
+
case nir_intrinsic_reduce:
case nir_intrinsic_inclusive_scan:
case nir_intrinsic_exclusive_scan:
More information about the mesa-commit
mailing list