Mesa (master): i965/fs: Implement FS_OPCODE_[UN]PACK_HALF_2x16_SPLIT[_XY] opcodes.
Kenneth Graunke
kwg at kemper.freedesktop.org
Thu Feb 20 23:55:55 UTC 2014
Module: Mesa
Branch: master
Commit: 808952a09543b60e59c5ad9238d8403fa9f1f15b
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=808952a09543b60e59c5ad9238d8403fa9f1f15b
Author: Kenneth Graunke <kenneth at whitecape.org>
Date: Wed Jan 29 13:45:27 2014 -0800
i965/fs: Implement FS_OPCODE_[UN]PACK_HALF_2x16_SPLIT[_XY] opcodes.
I'd neglected to port these to Broadwell. Most of this code is copy
and pasted from Gen7, but instead of using F32TO16/F16TO32, we just
use MOV with HF register types.
Fixes fs-packHalf2x16 and fs-unpackHalf2x16 tests (both the ARB
extension and ES 3.0 variants).
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
Reviewed-by: Eric Anholt <eric at anholt.net>
---
src/mesa/drivers/dri/i965/brw_fs.h | 7 +++
src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 76 ++++++++++++++++++++++-
2 files changed, 81 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 5058c48..b1e38b6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -731,6 +731,13 @@ private:
void generate_set_simd4x2_offset(fs_inst *ir,
struct brw_reg dst,
struct brw_reg offset);
+ void generate_pack_half_2x16_split(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg x,
+ struct brw_reg y);
+ void generate_unpack_half_2x16_split(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
void generate_discard_jump(fs_inst *ir);
void patch_discard_jumps_to_fb_writes();
diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
index 0e1214d..86e772c 100644
--- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
@@ -757,6 +757,78 @@ gen8_fs_generator::generate_set_sample_id(fs_inst *ir,
default_state.exec_size = save_exec_size;
}
+/**
+ * Change the register's data type from UD to HF, doubling the strides in order
+ * to compensate for halving the data type width.
+ */
+static struct brw_reg
+ud_reg_to_hf(struct brw_reg r)
+{
+ assert(r.type == BRW_REGISTER_TYPE_UD);
+ r.type = BRW_REGISTER_TYPE_HF;
+
+ /* The BRW_*_STRIDE enums are defined so that incrementing the field
+ * doubles the real stride.
+ */
+ if (r.hstride != 0)
+ ++r.hstride;
+ if (r.vstride != 0)
+ ++r.vstride;
+
+ return r;
+}
+
+void
+gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg x,
+ struct brw_reg y)
+{
+ assert(dst.type == BRW_REGISTER_TYPE_UD);
+ assert(x.type == BRW_REGISTER_TYPE_F);
+ assert(y.type == BRW_REGISTER_TYPE_F);
+
+ struct brw_reg dst_hf = ud_reg_to_hf(dst);
+
+ /* Give each 32-bit channel of dst the form below , where "." means
+ * unchanged.
+ * 0x....hhhh
+ */
+ MOV(dst_hf, y);
+
+ /* Now the form:
+ * 0xhhhh0000
+ */
+ SHL(dst, dst, brw_imm_ud(16u));
+
+ /* And, finally the form of packHalf2x16's output:
+ * 0xhhhhllll
+ */
+ MOV(dst_hf, x);
+}
+
+void
+gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ assert(dst.type == BRW_REGISTER_TYPE_F);
+ assert(src.type == BRW_REGISTER_TYPE_UD);
+
+ struct brw_reg src_hf = ud_reg_to_hf(src);
+
+ /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
+ * For the Y case, we wish to access only the upper word; therefore
+ * a 16-bit subregister offset is needed.
+ */
+ assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
+ inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
+ if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
+ src_hf.subnr += 2;
+
+ MOV(dst, src_hf);
+}
+
void
gen8_fs_generator::generate_code(exec_list *instructions)
{
@@ -1140,12 +1212,12 @@ gen8_fs_generator::generate_code(exec_list *instructions)
break;
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
- assert(!"XXX: Missing Gen8 scalar support for PACK_HALF_2x16_SPLIT");
+ generate_pack_half_2x16_split(ir, dst, src[0], src[1]);
break;
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
- assert(!"XXX: Missing Gen8 scalar support for UNPACK_HALF_2x16_SPLIT");
+ generate_unpack_half_2x16_split(ir, dst, src[0]);
break;
case FS_OPCODE_PLACEHOLDER_HALT:
More information about the mesa-commit
mailing list