[Mesa-dev] [PATCH 20/26] i965: Add an implemnetation of nir_op_fquantize2f16
Jason Ekstrand
jason at jlekstrand.net
Fri Mar 25 23:12:34 UTC 2016
---
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 28 ++++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 25 +++++++++++++++++++++++++
2 files changed, 53 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 7aff042..5255434 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1016,6 +1016,34 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
inst->saturate = instr->dest.saturate;
break;
+ case nir_op_fquantize2f16: {
+ fs_reg tmp16 = bld.vgrf(BRW_REGISTER_TYPE_D);
+ fs_reg tmp32 = bld.vgrf(BRW_REGISTER_TYPE_F);
+ fs_reg zero = bld.vgrf(BRW_REGISTER_TYPE_F);
+
+ /* The destination stride must be at least as big as the source stride. */
+ tmp16.type = BRW_REGISTER_TYPE_W;
+ tmp16.stride = 2;
+
+ /* Check for denormal */
+ fs_reg abs_src0 = op[0];
+ abs_src0.abs = true;
+ bld.CMP(bld.null_reg_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)),
+ BRW_CONDITIONAL_L);
+ /* Get the appropriately signed zero */
+ bld.AND(retype(zero, BRW_REGISTER_TYPE_UD),
+ retype(op[0], BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0x80000000));
+ /* Do the actual F32 -> F16 -> F32 conversion */
+ bld.emit(BRW_OPCODE_F32TO16, tmp16, op[0]);
+ bld.emit(BRW_OPCODE_F16TO32, tmp32, tmp16);
+ /* Select that or zero based on normal status */
+ inst = bld.SEL(result, zero, tmp32);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->saturate = instr->dest.saturate;
+ break;
+ }
+
case nir_op_fmin:
case nir_op_imin:
case nir_op_umin:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index c18694f..2e714fa 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1213,6 +1213,31 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
inst->saturate = instr->dest.saturate;
break;
+ case nir_op_fquantize2f16: {
+ /* See also vec4_visitor::emit_pack_half_2x16() */
+ src_reg tmp16 = src_reg(this, glsl_type::uvec4_type);
+ src_reg tmp32 = src_reg(this, glsl_type::vec4_type);
+ src_reg zero = src_reg(this, glsl_type::vec4_type);
+
+ /* Check for denormal */
+ src_reg abs_src0 = op[0];
+ abs_src0.abs = true;
+ emit(CMP(dst_null_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)),
+ BRW_CONDITIONAL_L));
+ /* Get the appropriately signed zero */
+ emit(AND(retype(dst_reg(zero), BRW_REGISTER_TYPE_UD),
+ retype(op[0], BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0x80000000)));
+ /* Do the actual F32 -> F16 -> F32 conversion */
+ emit(F32TO16(dst_reg(tmp16), op[0]));
+ emit(F16TO32(dst_reg(tmp32), tmp16));
+ /* Select that or zero based on normal status */
+ inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->saturate = instr->dest.saturate;
+ break;
+ }
+
case nir_op_fmin:
case nir_op_imin:
case nir_op_umin:
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list