[Mesa-dev] [PATCH 10/10] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations
Chad Versace
chad.versace at linux.intel.com
Thu Jan 10 00:10:28 PST 2013
Signed-off-by: Chad Versace <chad.versace at linux.intel.com>
---
src/mesa/drivers/dri/i965/brw_defines.h | 1 +
src/mesa/drivers/dri/i965/brw_fs.h | 7 ++
.../dri/i965/brw_fs_channel_expressions.cpp | 29 +++++++-
src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 39 ++++++++++-
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 78 +++++++++++++++++++++-
5 files changed, 149 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 22d3e98..1c43d68 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -713,6 +713,7 @@ enum opcode {
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_GLOBAL_OFFSET,
+ FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
VS_OPCODE_URB_WRITE,
VS_OPCODE_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index bcf38f3..59aa28d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -355,6 +355,10 @@ public:
fs_reg fix_math_operand(fs_reg src);
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
+ void emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y);
+ void emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0);
+ void emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0);
+
void emit_minmax(uint32_t conditionalmod, fs_reg dst,
fs_reg src0, fs_reg src1);
bool try_emit_saturate(ir_expression *ir);
@@ -541,6 +545,9 @@ private:
struct brw_reg src,
struct brw_reg offset);
void generate_discard_jump(fs_inst *inst);
+ void generate_unpack_half_2x16_split_y(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
void patch_discard_jumps_to_fb_writes();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 58521ee..7081511 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -76,8 +76,21 @@ channel_expressions_predicate(ir_instruction *ir)
return false;
for (i = 0; i < expr->get_num_operands(); i++) {
- if (expr->operands[i]->type->is_vector())
- return true;
+ if (expr->operands[i]->type->is_vector()) {
+ switch (expr->operation) {
+ case ir_binop_pack_half_2x16_split:
+ case ir_unop_pack_half_2x16:
+ case ir_unop_unpack_half_2x16:
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+ assert(!"WTF");
+ break;
+ default:
+ break;
+ }
+
+ return true;
+ }
}
return false;
@@ -342,9 +355,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
assert(!"not yet supported");
break;
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_half_2x16:
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
case ir_quadop_vector:
assert(!"should have been lowered");
break;
+
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+ case ir_binop_pack_half_2x16_split:
+ assert("!not reached: expression operates on scalars only");
+ break;
}
ir->remove();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 63f09fe..46e2409 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -920,6 +920,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
}
void
+fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ assert(intel->gen >= 7);
+
+ /* src has the form of unpackHalf2x16's input:
+ *
+ * w z y x
+ * |undef|undef|undef|0xhhhhllll|
+ *
+ * We wish to access only the "hhhh" bits of the source register, and hence
+ * must access it with a 16 bit subregister offset. To do so, we must
+ * halve the size of the source data type from UD to UW and compensate by
+ * doubling the stride.
+ */
+ assert(src.type == BRW_REGISTER_TYPE_UD);
+ src.type = BRW_REGISTER_TYPE_UW;
+ if (src.vstride > 0)
+ ++src.vstride;
+ if (src.hstride > 0)
+ ++src.hstride;
+ src.subnr += 2;
+
+ brw_F16TO32(p, dst, src);
+}
+
+void
fs_generator::generate_code(exec_list *instructions)
{
int last_native_insn_offset = p->next_insn_offset;
@@ -1079,7 +1107,12 @@ fs_generator::generate_code(exec_list *instructions)
case BRW_OPCODE_SHL:
brw_SHL(p, dst, src[0], src[1]);
break;
-
+ case BRW_OPCODE_F32TO16:
+ brw_F32TO16(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_F16TO32:
+ brw_F16TO32(p, dst, src[0]);
+ break;
case BRW_OPCODE_CMP:
brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
break;
@@ -1226,6 +1259,10 @@ fs_generator::generate_code(exec_list *instructions)
generate_set_global_offset(inst, dst, src[0], src[1]);
break;
+ case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
+ generate_unpack_half_2x16_split_y(inst, dst, src[0]);
+ break;
+
default:
if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
_mesa_problem(ctx, "Unsupported opcode `%s' in FS",
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index e70d6bf..563d1d5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -536,7 +536,20 @@ fs_visitor::visit(ir_expression *ir)
BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
this->result, op[0], op[1]);
break;
-
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
+ case ir_unop_pack_half_2x16:
+ assert(!"not reached: should be handled by lower_packing_builtins");
+ break;
+ case ir_unop_unpack_half_2x16_split_x:
+ emit_unpack_half_2x16_split_x(this->result, op[0]);
+ break;
+ case ir_unop_unpack_half_2x16_split_y:
+ emit_unpack_half_2x16_split_y(this->result, op[0]);
+ break;
case ir_binop_pow:
emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
break;
@@ -564,7 +577,9 @@ fs_visitor::visit(ir_expression *ir)
else
inst = emit(SHR(this->result, op[0], op[1]));
break;
-
+ case ir_binop_pack_half_2x16_split:
+ emit_pack_half_2x16_split(this->result, op[0], op[1]);
+ break;
case ir_binop_ubo_load:
/* This IR node takes a constant uniform block and a constant or
* variable byte offset within the block and loads a vector from that.
@@ -2259,6 +2274,65 @@ fs_visitor::emit_fb_writes()
}
void
+fs_visitor::emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y)
+{
+ if (intel->gen < 7)
+ assert(!"packHalf2x16 should be handled by lower_packing_builtins");
+
+ /* uint dst; */
+ assert(dst.type == BRW_REGISTER_TYPE_UD);
+
+ /* float x; */
+ assert(x.type == BRW_REGISTER_TYPE_F);
+
+ /* float y; */
+ assert(y.type == BRW_REGISTER_TYPE_F);
+
+ /* uint tmp; */
+ fs_reg tmp(this, glsl_type::uint_type);
+
+ /* dst = f32to16(x); */
+ emit(BRW_OPCODE_F32TO16, dst, x);
+
+ /* tmp = f32to16(y); */
+ emit(BRW_OPCODE_F32TO16, tmp, y);
+
+ /* tmp <<= 16; */
+ emit(BRW_OPCODE_SHL, tmp, tmp, fs_reg(16u));
+
+ /* dst |= tmp; */
+ emit(BRW_OPCODE_OR, dst, dst, tmp);
+}
+
+void
+fs_visitor::emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0)
+{
+ if (intel->gen < 7)
+ assert(!"unpackHalf2x16 should be lowered");
+
+ /* float dst; */
+ assert(dst.type == BRW_REGISTER_TYPE_F);
+
+ /* uint src0; */
+ assert(src0.type == BRW_REGISTER_TYPE_UD);
+
+ /* dst = f16to32(src0); */
+ emit(BRW_OPCODE_F16TO32, dst, src0);
+}
+
+void
+fs_visitor::emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0)
+{
+ if (intel->gen < 7)
+ assert(!"unpackHalf2x16 should be lowered");
+
+ assert(dst.type == BRW_REGISTER_TYPE_F);
+ assert(src0.type == BRW_REGISTER_TYPE_UD);
+
+ emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, dst, src0);
+}
+
+void
fs_visitor::resolve_ud_negate(fs_reg *reg)
{
if (reg->type != BRW_REGISTER_TYPE_UD ||
--
1.8.1
More information about the mesa-dev
mailing list