[Mesa-dev] [PATCH 3/3] i965/fs: Implement FS_OPCODE_[UN]PACK_HALF_2x16_SPLIT[_XY] opcodes.

Ian Romanick idr at freedesktop.org
Thu Feb 6 16:14:41 PST 2014


On 01/29/2014 02:36 PM, Kenneth Graunke wrote:
> I'd neglected to port these to Broadwell.  Most of this code is copy
> and pasted from Gen7, but instead of using F32TO16/F16TO32, we just
> use MOV with HF register types.
> 
> Fixes fs-packHalf2x16 and fs-unpackHalf2x16 tests (both the ARB
> extension and ES 3.0 variants).

In light of the redaction on patch 2, is this actually correct?

> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>

One nearly infinitesimal nit below.  Assume the commit message is
correct, this patch is

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>

> ---
>  src/mesa/drivers/dri/i965/brw_fs.h              |  7 +++
>  src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 76 ++++++++++++++++++++++++-
>  2 files changed, 81 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 9c5c13a..5c7f2ce 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -673,6 +673,13 @@ private:
>     void generate_set_simd4x2_offset(fs_inst *ir,
>                                      struct brw_reg dst,
>                                      struct brw_reg offset);
> +   void generate_pack_half_2x16_split(fs_inst *inst,
> +                                      struct brw_reg dst,
> +                                      struct brw_reg x,
> +                                      struct brw_reg y);
> +   void generate_unpack_half_2x16_split(fs_inst *inst,
> +                                        struct brw_reg dst,
> +                                        struct brw_reg src);
>     void generate_discard_jump(fs_inst *ir);
>  
>     void patch_discard_jumps_to_fb_writes();
> diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
> index 6793ce0..43eaa35 100644
> --- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
> @@ -582,6 +582,78 @@ gen8_fs_generator::generate_set_simd4x2_offset(fs_inst *ir,
>     MOV_RAW(retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
>  }
>  
> +/**
> + * Change the register's data type from UD to HF, doubling the strides in order
> + * to compensate for halving the data type width.
> + */
> +static struct brw_reg
> +ud_reg_to_hf(struct brw_reg r)
> +{
> +   assert(r.type == BRW_REGISTER_TYPE_UD);
> +   r.type = BRW_REGISTER_TYPE_HF;
> +
> +   /* The BRW_*_STRIDE enums are defined so that incrementing the field
> +    * doubles the real stride.
> +    */
> +   if (r.hstride != 0)
> +      ++r.hstride;
> +   if (r.vstride != 0)
> +      ++r.vstride;
> +
> +   return r;
> +}
> +
> +void
> +gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
> +                                                 struct brw_reg dst,
> +                                                 struct brw_reg x,
> +                                                 struct brw_reg y)
> +{
> +   assert(dst.type == BRW_REGISTER_TYPE_UD);
> +   assert(x.type == BRW_REGISTER_TYPE_F);
> +   assert(y.type == BRW_REGISTER_TYPE_F);
> +
> +   struct brw_reg dst_hf = ud_reg_to_hf(dst);
> +
> +   /* Give each 32-bit channel of dst the form below , where "." means
                                                       ^ extra space
> +    * unchanged.
> +    *   0x....hhhh
> +    */
> +   MOV(dst_hf, y);
> +
> +   /* Now the form:
> +    *   0xhhhh0000
> +    */
> +   SHL(dst, dst, brw_imm_ud(16u));
> +
> +   /* And, finally the form of packHalf2x16's output:
> +    *   0xhhhhllll
> +    */
> +   MOV(dst_hf, x);
> +}
> +
> +void
> +gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
> +                                                   struct brw_reg dst,
> +                                                   struct brw_reg src)
> +{
> +   assert(dst.type == BRW_REGISTER_TYPE_F);
> +   assert(src.type == BRW_REGISTER_TYPE_UD);
> +
> +   struct brw_reg src_hf = ud_reg_to_hf(src);
> +
> +   /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
> +    * For the Y case, we wish to access only the upper word; therefore
> +    * a 16-bit subregister offset is needed.
> +    */
> +   assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
> +          inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
> +   if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
> +      src_hf.subnr += 2;
> +
> +   MOV(dst, src_hf);
> +}
> +
>  void
>  gen8_fs_generator::generate_code(exec_list *instructions)
>  {
> @@ -965,12 +1037,12 @@ gen8_fs_generator::generate_code(exec_list *instructions)
>           break;
>  
>        case FS_OPCODE_PACK_HALF_2x16_SPLIT:
> -         assert(!"XXX: Missing Gen8 scalar support for PACK_HALF_2x16_SPLIT");
> +         generate_pack_half_2x16_split(ir, dst, src[0], src[1]);
>           break;
>  
>        case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
>        case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
> -         assert(!"XXX: Missing Gen8 scalar support for UNPACK_HALF_2x16_SPLIT");
> +         generate_unpack_half_2x16_split(ir, dst, src[0]);
>           break;
>  
>        case FS_OPCODE_PLACEHOLDER_HALT:



More information about the mesa-dev mailing list