[Mesa-dev] [PATCH (gles3) 20/20] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations (v2)

Chad Versace chad.versace at linux.intel.com
Mon Jan 21 00:49:32 PST 2013


v2: Remove lewd comment [for idr].

Signed-off-by: Chad Versace <chad.versace at linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h            |  1 +
 src/mesa/drivers/dri/i965/brw_fs.h                 |  7 ++
 .../dri/i965/brw_fs_channel_expressions.cpp        | 12 ++++
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp          | 39 +++++++++-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       | 82 +++++++++++++++++++++-
 5 files changed, 138 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index e2f1e65..e3d297c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -726,6 +726,7 @@ enum opcode {
    FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
    FS_OPCODE_DISCARD_JUMP,
    FS_OPCODE_SET_GLOBAL_OFFSET,
+   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
 
    VS_OPCODE_URB_WRITE,
    VS_OPCODE_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index b47b0d0..49e2ed0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -355,6 +355,10 @@ public:
    fs_reg fix_math_operand(fs_reg src);
    fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
    fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
+   void emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y);
+   void emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0);
+   void emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0);
+
    void emit_minmax(uint32_t conditionalmod, fs_reg dst,
                     fs_reg src0, fs_reg src1);
    bool try_emit_saturate(ir_expression *ir);
@@ -541,6 +545,9 @@ private:
                                    struct brw_reg src,
                                    struct brw_reg offset);
    void generate_discard_jump(fs_inst *inst);
+   void generate_unpack_half_2x16_split_y(fs_inst *inst,
+                                          struct brw_reg dst,
+                                          struct brw_reg src);
 
    void patch_discard_jumps_to_fb_writes();
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 58521ee..e19da51 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -342,9 +342,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
       assert(!"not yet supported");
       break;
 
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_half_2x16:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
    case ir_quadop_vector:
       assert(!"should have been lowered");
       break;
+
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
+   case ir_binop_pack_half_2x16_split:
+      assert("!not reached: expression operates on scalars only");
+      break;
    }
 
    ir->remove();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 324e665..0ff296c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -923,6 +923,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
 }
 
 void
+fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst,
+                                                struct brw_reg dst,
+                                                struct brw_reg src)
+{
+   assert(intel->gen >= 7);
+
+   /* src has the form of unpackHalf2x16's input:
+    *
+    *         w     z     y          x
+    *   |undef|undef|undef|0xhhhhllll|
+    *
+    * We wish to access only the "hhhh" bits of the source register, and hence
+    * must access it with a 16 bit subregister offset.  To do so, we must
+    * halve the size of the source data type from UD to UW and compensate by
+    * doubling the stride.
+    */
+   assert(src.type == BRW_REGISTER_TYPE_UD);
+   src.type = BRW_REGISTER_TYPE_UW;
+   if (src.vstride > 0)
+      ++src.vstride;
+   if (src.hstride > 0)
+      ++src.hstride;
+   src.subnr += 2;
+
+   brw_F16TO32(p, dst, src);
+}
+
+void
 fs_generator::generate_code(exec_list *instructions)
 {
    int last_native_insn_offset = p->next_insn_offset;
@@ -1082,7 +1110,12 @@ fs_generator::generate_code(exec_list *instructions)
       case BRW_OPCODE_SHL:
 	 brw_SHL(p, dst, src[0], src[1]);
 	 break;
-
+      case BRW_OPCODE_F32TO16:
+         brw_F32TO16(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_F16TO32:
+         brw_F16TO32(p, dst, src[0]);
+         break;
       case BRW_OPCODE_CMP:
 	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
 	 break;
@@ -1229,6 +1262,10 @@ fs_generator::generate_code(exec_list *instructions)
          generate_set_global_offset(inst, dst, src[0], src[1]);
          break;
 
+      case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
+         generate_unpack_half_2x16_split_y(inst, dst, src[0]);
+         break;
+
       default:
 	 if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
 	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 5885989..042ccca 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -538,7 +538,20 @@ fs_visitor::visit(ir_expression *ir)
                   BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
                   this->result, op[0], op[1]);
       break;
-
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
+   case ir_unop_pack_half_2x16:
+      assert(!"not reached: should be handled by lower_packing_builtins");
+      break;
+   case ir_unop_unpack_half_2x16_split_x:
+      emit_unpack_half_2x16_split_x(this->result, op[0]);
+      break;
+   case ir_unop_unpack_half_2x16_split_y:
+      emit_unpack_half_2x16_split_y(this->result, op[0]);
+      break;
    case ir_binop_pow:
       emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
       break;
@@ -566,7 +579,9 @@ fs_visitor::visit(ir_expression *ir)
       else
 	 inst = emit(SHR(this->result, op[0], op[1]));
       break;
-
+   case ir_binop_pack_half_2x16_split:
+      emit_pack_half_2x16_split(this->result, op[0], op[1]);
+      break;
    case ir_binop_ubo_load:
       /* This IR node takes a constant uniform block and a constant or
        * variable byte offset within the block and loads a vector from that.
@@ -2261,6 +2276,69 @@ fs_visitor::emit_fb_writes()
 }
 
 void
+fs_visitor::emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y)
+{
+   if (intel->gen < 7)
+      assert(!"packHalf2x16 should be lowered");
+
+   /* uint dst; */
+   assert(dst.type == BRW_REGISTER_TYPE_UD);
+
+   /* float x; */
+   assert(x.type == BRW_REGISTER_TYPE_F);
+
+   /* float y; */
+   assert(y.type == BRW_REGISTER_TYPE_F);
+
+   /* uint tmp; */
+   fs_reg tmp(this, glsl_type::uint_type);
+
+   /* dst = f32to16(x); */
+   emit(BRW_OPCODE_F32TO16, dst, x);
+
+   /* tmp = f32to16(y); */
+   emit(BRW_OPCODE_F32TO16, tmp, y);
+
+   /* tmp <<= 16; */
+   emit(BRW_OPCODE_SHL, tmp, tmp, fs_reg(16u));
+
+   /* dst |= tmp; */
+   emit(BRW_OPCODE_OR, dst, dst, tmp);
+}
+
+void
+fs_visitor::emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0)
+{
+   if (intel->gen < 7)
+      assert(!"unpackHalf2x16 should be lowered");
+
+   /* float dst; */
+   assert(dst.type == BRW_REGISTER_TYPE_F);
+
+   /* uint src0; */
+   assert(src0.type == BRW_REGISTER_TYPE_UD);
+
+   /* dst = f16to32(src0); */
+   emit(BRW_OPCODE_F16TO32, dst, src0);
+}
+
+void
+fs_visitor::emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0)
+{
+   if (intel->gen < 7)
+      assert(!"unpackHalf2x16 should be lowered");
+
+   assert(dst.type == BRW_REGISTER_TYPE_F);
+   assert(src0.type == BRW_REGISTER_TYPE_UD);
+
+   /* For the Y channel, we must emit an instruction with clever region
+    * addressing. It's only possible to choose such addressing at the
+    * fs_generator stage, so we postpone with a special FS opcode.
+    */
+   emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, dst, src0);
+}
+
+void
 fs_visitor::resolve_ud_negate(fs_reg *reg)
 {
    if (reg->type != BRW_REGISTER_TYPE_UD ||
-- 
1.8.1.1



More information about the mesa-dev mailing list