[Mesa-dev] [PATCH (gles3) 20/20] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations (v2)

Mon Jan 21 13:18:39 PST 2013

Chad Versace <chad.versace at linux.intel.com> writes:
>     ir->remove();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> index 324e665..0ff296c 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> @@ -923,6 +923,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
>  }
>  
>  void
> +fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst,
> +                                                struct brw_reg dst,
> +                                                struct brw_reg src)
> +{
> +   assert(intel->gen >= 7);
> +
> +   /* src has the form of unpackHalf2x16's input:
> +    *
> +    *         w     z     y          x
> +    *   |undef|undef|undef|0xhhhhllll|

I don't understand what "w z y x" mean here.  I thought src was just a
scalar uint (one uint per pixel).

> +   assert(src.type == BRW_REGISTER_TYPE_UD);
> +   src.type = BRW_REGISTER_TYPE_UW;
> +   if (src.vstride > 0)
> +      ++src.vstride;
> +   if (src.hstride > 0)
> +      ++src.hstride;

This ++hstride/++vstride looks totally wrong to me.  We use a vstride of
8 in 16-wide for temporaries -- doesn't this totally trash that?

I think what you want here is an unconditional *= 2 for both values.

> +   src.subnr += 2;
> +
> +   brw_F16TO32(p, dst, src);

> @@ -2261,6 +2276,69 @@ fs_visitor::emit_fb_writes()
>  }
>  
>  void
> +fs_visitor::emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y)
> +{
> +   if (intel->gen < 7)
> +      assert(!"packHalf2x16 should be lowered");
> +
> +   /* uint dst; */
> +   assert(dst.type == BRW_REGISTER_TYPE_UD);
> +
> +   /* float x; */
> +   assert(x.type == BRW_REGISTER_TYPE_F);
> +
> +   /* float y; */
> +   assert(y.type == BRW_REGISTER_TYPE_F);
> +
> +   /* uint tmp; */
> +   fs_reg tmp(this, glsl_type::uint_type);
> +
> +   /* dst = f32to16(x); */
> +   emit(BRW_OPCODE_F32TO16, dst, x);
> +
> +   /* tmp = f32to16(y); */
> +   emit(BRW_OPCODE_F32TO16, tmp, y);
> +
> +   /* tmp <<= 16; */
> +   emit(BRW_OPCODE_SHL, tmp, tmp, fs_reg(16u));
> +
> +   /* dst |= tmp; */
> +   emit(BRW_OPCODE_OR, dst, dst, tmp);
> +}

These comments before every op aren't helping me, since they just say
the same as what the code below says.

Actually, I think this code can bet simpler, given:

"The destination must be DWord aligned, the result is stored in the
lower Word for each channel, data in the upper Word of the destination
is untouched"

So:

   emit(BRW_OPCODE_F32TO16, dst, y);
   emit(BRW_OPCODE_SHL, dst, dst, fs_reg(16u));
   emit(BRW_OPCODE_F32TO16, dst, x);

> +void
> +fs_visitor::emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0)
> +{
> +   if (intel->gen < 7)
> +      assert(!"unpackHalf2x16 should be lowered");
> +
> +   /* float dst; */
> +   assert(dst.type == BRW_REGISTER_TYPE_F);
> +
> +   /* uint src0; */
> +   assert(src0.type == BRW_REGISTER_TYPE_UD);
> +
> +   /* dst = f16to32(src0); */
> +   emit(BRW_OPCODE_F16TO32, dst, src0);
> +}

Drop these comments.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 197 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130121/c85ca55e/attachment.pgp>