[Mesa-dev] [PATCH (gles3) 20/20] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations (v2)
Chad Versace
chad.versace at linux.intel.com
Mon Jan 21 00:49:32 PST 2013
v2: Remove lewd comment [for idr].
Signed-off-by: Chad Versace <chad.versace at linux.intel.com>
---
src/mesa/drivers/dri/i965/brw_defines.h | 1 +
src/mesa/drivers/dri/i965/brw_fs.h | 7 ++
.../dri/i965/brw_fs_channel_expressions.cpp | 12 ++++
src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 39 +++++++++-
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 82 +++++++++++++++++++++-
5 files changed, 138 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index e2f1e65..e3d297c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -726,6 +726,7 @@ enum opcode {
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_GLOBAL_OFFSET,
+ FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
VS_OPCODE_URB_WRITE,
VS_OPCODE_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index b47b0d0..49e2ed0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -355,6 +355,10 @@ public:
fs_reg fix_math_operand(fs_reg src);
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
+ void emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y);
+ void emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0);
+ void emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0);
+
void emit_minmax(uint32_t conditionalmod, fs_reg dst,
fs_reg src0, fs_reg src1);
bool try_emit_saturate(ir_expression *ir);
@@ -541,6 +545,9 @@ private:
struct brw_reg src,
struct brw_reg offset);
void generate_discard_jump(fs_inst *inst);
+ void generate_unpack_half_2x16_split_y(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
void patch_discard_jumps_to_fb_writes();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 58521ee..e19da51 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -342,9 +342,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
assert(!"not yet supported");
break;
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_half_2x16:
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
case ir_quadop_vector:
assert(!"should have been lowered");
break;
+
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+ case ir_binop_pack_half_2x16_split:
+ assert("!not reached: expression operates on scalars only");
+ break;
}
ir->remove();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 324e665..0ff296c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -923,6 +923,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
}
void
+fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ assert(intel->gen >= 7);
+
+ /* src has the form of unpackHalf2x16's input:
+ *
+ * w z y x
+ * |undef|undef|undef|0xhhhhllll|
+ *
+ * We wish to access only the "hhhh" bits of the source register, and hence
+ * must access it with a 16 bit subregister offset. To do so, we must
+ * halve the size of the source data type from UD to UW and compensate by
+ * doubling the stride.
+ */
+ assert(src.type == BRW_REGISTER_TYPE_UD);
+ src.type = BRW_REGISTER_TYPE_UW;
+ if (src.vstride > 0)
+ ++src.vstride;
+ if (src.hstride > 0)
+ ++src.hstride;
+ src.subnr += 2;
+
+ brw_F16TO32(p, dst, src);
+}
+
+void
fs_generator::generate_code(exec_list *instructions)
{
int last_native_insn_offset = p->next_insn_offset;
@@ -1082,7 +1110,12 @@ fs_generator::generate_code(exec_list *instructions)
case BRW_OPCODE_SHL:
brw_SHL(p, dst, src[0], src[1]);
break;
-
+ case BRW_OPCODE_F32TO16:
+ brw_F32TO16(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_F16TO32:
+ brw_F16TO32(p, dst, src[0]);
+ break;
case BRW_OPCODE_CMP:
brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
break;
@@ -1229,6 +1262,10 @@ fs_generator::generate_code(exec_list *instructions)
generate_set_global_offset(inst, dst, src[0], src[1]);
break;
+ case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
+ generate_unpack_half_2x16_split_y(inst, dst, src[0]);
+ break;
+
default:
if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
_mesa_problem(ctx, "Unsupported opcode `%s' in FS",
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 5885989..042ccca 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -538,7 +538,20 @@ fs_visitor::visit(ir_expression *ir)
BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
this->result, op[0], op[1]);
break;
-
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
+ case ir_unop_pack_half_2x16:
+ assert(!"not reached: should be handled by lower_packing_builtins");
+ break;
+ case ir_unop_unpack_half_2x16_split_x:
+ emit_unpack_half_2x16_split_x(this->result, op[0]);
+ break;
+ case ir_unop_unpack_half_2x16_split_y:
+ emit_unpack_half_2x16_split_y(this->result, op[0]);
+ break;
case ir_binop_pow:
emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
break;
@@ -566,7 +579,9 @@ fs_visitor::visit(ir_expression *ir)
else
inst = emit(SHR(this->result, op[0], op[1]));
break;
-
+ case ir_binop_pack_half_2x16_split:
+ emit_pack_half_2x16_split(this->result, op[0], op[1]);
+ break;
case ir_binop_ubo_load:
/* This IR node takes a constant uniform block and a constant or
* variable byte offset within the block and loads a vector from that.
@@ -2261,6 +2276,69 @@ fs_visitor::emit_fb_writes()
}
void
+fs_visitor::emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y)
+{
+ if (intel->gen < 7)
+ assert(!"packHalf2x16 should be lowered");
+
+ /* uint dst; */
+ assert(dst.type == BRW_REGISTER_TYPE_UD);
+
+ /* float x; */
+ assert(x.type == BRW_REGISTER_TYPE_F);
+
+ /* float y; */
+ assert(y.type == BRW_REGISTER_TYPE_F);
+
+ /* uint tmp; */
+ fs_reg tmp(this, glsl_type::uint_type);
+
+ /* dst = f32to16(x); */
+ emit(BRW_OPCODE_F32TO16, dst, x);
+
+ /* tmp = f32to16(y); */
+ emit(BRW_OPCODE_F32TO16, tmp, y);
+
+ /* tmp <<= 16; */
+ emit(BRW_OPCODE_SHL, tmp, tmp, fs_reg(16u));
+
+ /* dst |= tmp; */
+ emit(BRW_OPCODE_OR, dst, dst, tmp);
+}
+
+void
+fs_visitor::emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0)
+{
+ if (intel->gen < 7)
+ assert(!"unpackHalf2x16 should be lowered");
+
+ /* float dst; */
+ assert(dst.type == BRW_REGISTER_TYPE_F);
+
+ /* uint src0; */
+ assert(src0.type == BRW_REGISTER_TYPE_UD);
+
+ /* dst = f16to32(src0); */
+ emit(BRW_OPCODE_F16TO32, dst, src0);
+}
+
+void
+fs_visitor::emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0)
+{
+ if (intel->gen < 7)
+ assert(!"unpackHalf2x16 should be lowered");
+
+ assert(dst.type == BRW_REGISTER_TYPE_F);
+ assert(src0.type == BRW_REGISTER_TYPE_UD);
+
+ /* For the Y channel, we must emit an instruction with clever region
+ * addressing. It's only possible to choose such addressing at the
+ * fs_generator stage, so we postpone with a special FS opcode.
+ */
+ emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, dst, src0);
+}
+
+void
fs_visitor::resolve_ud_negate(fs_reg *reg)
{
if (reg->type != BRW_REGISTER_TYPE_UD ||
--
1.8.1.1
More information about the mesa-dev
mailing list