[Mesa-dev] [PATCH 10/10] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations

Thu Jan 10 10:45:38 PST 2013

On 01/10/2013 12:10 AM, Chad Versace wrote:
> Signed-off-by: Chad Versace <chad.versace at linux.intel.com>
> ---
>   src/mesa/drivers/dri/i965/brw_defines.h            |  1 +
>   src/mesa/drivers/dri/i965/brw_fs.h                 |  7 ++
>   .../dri/i965/brw_fs_channel_expressions.cpp        | 29 +++++++-
>   src/mesa/drivers/dri/i965/brw_fs_emit.cpp          | 39 ++++++++++-
>   src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       | 78 +++++++++++++++++++++-
>   5 files changed, 149 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 22d3e98..1c43d68 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -713,6 +713,7 @@ enum opcode {
>      FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
>      FS_OPCODE_DISCARD_JUMP,
>      FS_OPCODE_SET_GLOBAL_OFFSET,
> +   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
>
>      VS_OPCODE_URB_WRITE,
>      VS_OPCODE_SCRATCH_READ,
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index bcf38f3..59aa28d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -355,6 +355,10 @@ public:
>      fs_reg fix_math_operand(fs_reg src);
>      fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
>      fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
> +   void emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y);
> +   void emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0);
> +   void emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0);
> +
>      void emit_minmax(uint32_t conditionalmod, fs_reg dst,
>                       fs_reg src0, fs_reg src1);
>      bool try_emit_saturate(ir_expression *ir);
> @@ -541,6 +545,9 @@ private:
>                                      struct brw_reg src,
>                                      struct brw_reg offset);
>      void generate_discard_jump(fs_inst *inst);
> +   void generate_unpack_half_2x16_split_y(fs_inst *inst,
> +                                          struct brw_reg dst,
> +                                          struct brw_reg src);
>
>      void patch_discard_jumps_to_fb_writes();
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> index 58521ee..7081511 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> @@ -76,8 +76,21 @@ channel_expressions_predicate(ir_instruction *ir)
>         return false;
>
>      for (i = 0; i < expr->get_num_operands(); i++) {
> -      if (expr->operands[i]->type->is_vector())
> -	 return true;
> +      if (expr->operands[i]->type->is_vector()) {
> +         switch (expr->operation) {
> +         case ir_binop_pack_half_2x16_split:
> +         case ir_unop_pack_half_2x16:
> +         case ir_unop_unpack_half_2x16:
> +         case ir_unop_unpack_half_2x16_split_x:
> +         case ir_unop_unpack_half_2x16_split_y:
> +            assert(!"WTF");

Classy. :)  Maybe (like below)

     assert("!not reached: expression operates on scalars only");

> +            break;
> +         default:
> +            break;
> +         }
> +
> +         return true;
> +      }
>      }
>
>      return false;
> @@ -342,9 +355,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
>         assert(!"not yet supported");
>         break;
>
> +   case ir_unop_pack_snorm_2x16:
> +   case ir_unop_pack_unorm_2x16:
> +   case ir_unop_pack_half_2x16:
> +   case ir_unop_unpack_snorm_2x16:
> +   case ir_unop_unpack_unorm_2x16:
> +   case ir_unop_unpack_half_2x16:
>      case ir_quadop_vector:
>         assert(!"should have been lowered");
>         break;
> +
> +   case ir_unop_unpack_half_2x16_split_x:
> +   case ir_unop_unpack_half_2x16_split_y:
> +   case ir_binop_pack_half_2x16_split:
> +      assert("!not reached: expression operates on scalars only");
> +      break;
>      }
>
>      ir->remove();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> index 63f09fe..46e2409 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> @@ -920,6 +920,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
>   }
>
>   void
> +fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst,
> +                                                struct brw_reg dst,
> +                                                struct brw_reg src)
> +{
> +   assert(intel->gen >= 7);
> +
> +   /* src has the form of unpackHalf2x16's input:
> +    *
> +    *         w     z     y          x
> +    *   |undef|undef|undef|0xhhhhllll|
> +    *
> +    * We wish to access only the "hhhh" bits of the source register, and hence
> +    * must access it with a 16 bit subregister offset.  To do so, we must
> +    * halve the size of the source data type from UD to UW and compensate by
> +    * doubling the stride.
> +    */
> +   assert(src.type == BRW_REGISTER_TYPE_UD);
> +   src.type = BRW_REGISTER_TYPE_UW;
> +   if (src.vstride > 0)
> +      ++src.vstride;
> +   if (src.hstride > 0)
> +      ++src.hstride;
> +   src.subnr += 2;
> +
> +   brw_F16TO32(p, dst, src);
> +}
> +
> +void
>   fs_generator::generate_code(exec_list *instructions)
>   {
>      int last_native_insn_offset = p->next_insn_offset;
> @@ -1079,7 +1107,12 @@ fs_generator::generate_code(exec_list *instructions)
>         case BRW_OPCODE_SHL:
>   	 brw_SHL(p, dst, src[0], src[1]);
>   	 break;
> -
> +      case BRW_OPCODE_F32TO16:
> +         brw_F32TO16(p, dst, src[0]);
> +         break;
> +      case BRW_OPCODE_F16TO32:
> +         brw_F16TO32(p, dst, src[0]);
> +         break;
>         case BRW_OPCODE_CMP:
>   	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
>   	 break;
> @@ -1226,6 +1259,10 @@ fs_generator::generate_code(exec_list *instructions)
>            generate_set_global_offset(inst, dst, src[0], src[1]);
>            break;
>
> +      case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
> +         generate_unpack_half_2x16_split_y(inst, dst, src[0]);
> +         break;
> +
>         default:
>   	 if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
>   	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index e70d6bf..563d1d5 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -536,7 +536,20 @@ fs_visitor::visit(ir_expression *ir)
>                     BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
>                     this->result, op[0], op[1]);
>         break;
> -
> +   case ir_unop_pack_snorm_2x16:
> +   case ir_unop_pack_unorm_2x16:
> +   case ir_unop_unpack_snorm_2x16:
> +   case ir_unop_unpack_unorm_2x16:
> +   case ir_unop_unpack_half_2x16:
> +   case ir_unop_pack_half_2x16:
> +      assert(!"not reached: should be handled by lower_packing_builtins");
> +      break;
> +   case ir_unop_unpack_half_2x16_split_x:
> +      emit_unpack_half_2x16_split_x(this->result, op[0]);
> +      break;
> +   case ir_unop_unpack_half_2x16_split_y:
> +      emit_unpack_half_2x16_split_y(this->result, op[0]);
> +      break;
>      case ir_binop_pow:
>         emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
>         break;
> @@ -564,7 +577,9 @@ fs_visitor::visit(ir_expression *ir)
>         else
>   	 inst = emit(SHR(this->result, op[0], op[1]));
>         break;
> -
> +   case ir_binop_pack_half_2x16_split:
> +      emit_pack_half_2x16_split(this->result, op[0], op[1]);
> +      break;
>      case ir_binop_ubo_load:
>         /* This IR node takes a constant uniform block and a constant or
>          * variable byte offset within the block and loads a vector from that.
> @@ -2259,6 +2274,65 @@ fs_visitor::emit_fb_writes()
>   }
>
>   void
> +fs_visitor::emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y)
> +{
> +   if (intel->gen < 7)
> +      assert(!"packHalf2x16 should be handled by lower_packing_builtins");
> +
> +   /* uint dst; */
> +   assert(dst.type == BRW_REGISTER_TYPE_UD);
> +
> +   /* float x; */
> +   assert(x.type == BRW_REGISTER_TYPE_F);
> +
> +   /* float y; */
> +   assert(y.type == BRW_REGISTER_TYPE_F);
> +
> +   /* uint tmp; */
> +   fs_reg tmp(this, glsl_type::uint_type);
> +
> +   /* dst = f32to16(x); */
> +   emit(BRW_OPCODE_F32TO16, dst, x);
> +
> +   /* tmp = f32to16(y); */
> +   emit(BRW_OPCODE_F32TO16, tmp, y);
> +
> +   /* tmp <<= 16; */
> +   emit(BRW_OPCODE_SHL, tmp, tmp, fs_reg(16u));
> +
> +   /* dst |= tmp; */
> +   emit(BRW_OPCODE_OR, dst, dst, tmp);
> +}
> +
> +void
> +fs_visitor::emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0)
> +{
> +   if (intel->gen < 7)
> +      assert(!"unpackHalf2x16 should be lowered");
> +
> +   /* float dst; */
> +   assert(dst.type == BRW_REGISTER_TYPE_F);
> +
> +   /* uint src0; */
> +   assert(src0.type == BRW_REGISTER_TYPE_UD);
> +
> +   /* dst = f16to32(src0); */
> +   emit(BRW_OPCODE_F16TO32, dst, src0);
> +}
> +
> +void
> +fs_visitor::emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0)
> +{
> +   if (intel->gen < 7)
> +      assert(!"unpackHalf2x16 should be lowered");
> +
> +   assert(dst.type == BRW_REGISTER_TYPE_F);
> +   assert(src0.type == BRW_REGISTER_TYPE_UD);
> +
> +   emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, dst, src0);
> +}
> +
> +void
>   fs_visitor::resolve_ud_negate(fs_reg *reg)
>   {
>      if (reg->type != BRW_REGISTER_TYPE_UD ||
>