[Mesa-dev] [PATCH 3/3] i965/fs: Implement FS_OPCODE_[UN]PACK_HALF_2x16_SPLIT[_XY] opcodes.
Ian Romanick
idr at freedesktop.org
Thu Feb 6 16:14:41 PST 2014
On 01/29/2014 02:36 PM, Kenneth Graunke wrote:
> I'd neglected to port these to Broadwell. Most of this code is copy
> and pasted from Gen7, but instead of using F32TO16/F16TO32, we just
> use MOV with HF register types.
>
> Fixes fs-packHalf2x16 and fs-unpackHalf2x16 tests (both the ARB
> extension and ES 3.0 variants).
In light of the redaction on patch 2, is this actually correct?
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
One nearly infinitesimal nit below. Assume the commit message is
correct, this patch is
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
> ---
> src/mesa/drivers/dri/i965/brw_fs.h | 7 +++
> src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 76 ++++++++++++++++++++++++-
> 2 files changed, 81 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 9c5c13a..5c7f2ce 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -673,6 +673,13 @@ private:
> void generate_set_simd4x2_offset(fs_inst *ir,
> struct brw_reg dst,
> struct brw_reg offset);
> + void generate_pack_half_2x16_split(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg x,
> + struct brw_reg y);
> + void generate_unpack_half_2x16_split(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src);
> void generate_discard_jump(fs_inst *ir);
>
> void patch_discard_jumps_to_fb_writes();
> diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
> index 6793ce0..43eaa35 100644
> --- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
> @@ -582,6 +582,78 @@ gen8_fs_generator::generate_set_simd4x2_offset(fs_inst *ir,
> MOV_RAW(retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
> }
>
> +/**
> + * Change the register's data type from UD to HF, doubling the strides in order
> + * to compensate for halving the data type width.
> + */
> +static struct brw_reg
> +ud_reg_to_hf(struct brw_reg r)
> +{
> + assert(r.type == BRW_REGISTER_TYPE_UD);
> + r.type = BRW_REGISTER_TYPE_HF;
> +
> + /* The BRW_*_STRIDE enums are defined so that incrementing the field
> + * doubles the real stride.
> + */
> + if (r.hstride != 0)
> + ++r.hstride;
> + if (r.vstride != 0)
> + ++r.vstride;
> +
> + return r;
> +}
> +
> +void
> +gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg x,
> + struct brw_reg y)
> +{
> + assert(dst.type == BRW_REGISTER_TYPE_UD);
> + assert(x.type == BRW_REGISTER_TYPE_F);
> + assert(y.type == BRW_REGISTER_TYPE_F);
> +
> + struct brw_reg dst_hf = ud_reg_to_hf(dst);
> +
> + /* Give each 32-bit channel of dst the form below , where "." means
^ extra space
> + * unchanged.
> + * 0x....hhhh
> + */
> + MOV(dst_hf, y);
> +
> + /* Now the form:
> + * 0xhhhh0000
> + */
> + SHL(dst, dst, brw_imm_ud(16u));
> +
> + /* And, finally the form of packHalf2x16's output:
> + * 0xhhhhllll
> + */
> + MOV(dst_hf, x);
> +}
> +
> +void
> +gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src)
> +{
> + assert(dst.type == BRW_REGISTER_TYPE_F);
> + assert(src.type == BRW_REGISTER_TYPE_UD);
> +
> + struct brw_reg src_hf = ud_reg_to_hf(src);
> +
> + /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
> + * For the Y case, we wish to access only the upper word; therefore
> + * a 16-bit subregister offset is needed.
> + */
> + assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
> + inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
> + if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
> + src_hf.subnr += 2;
> +
> + MOV(dst, src_hf);
> +}
> +
> void
> gen8_fs_generator::generate_code(exec_list *instructions)
> {
> @@ -965,12 +1037,12 @@ gen8_fs_generator::generate_code(exec_list *instructions)
> break;
>
> case FS_OPCODE_PACK_HALF_2x16_SPLIT:
> - assert(!"XXX: Missing Gen8 scalar support for PACK_HALF_2x16_SPLIT");
> + generate_pack_half_2x16_split(ir, dst, src[0], src[1]);
> break;
>
> case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
> case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
> - assert(!"XXX: Missing Gen8 scalar support for UNPACK_HALF_2x16_SPLIT");
> + generate_unpack_half_2x16_split(ir, dst, src[0]);
> break;
>
> case FS_OPCODE_PLACEHOLDER_HALT:
More information about the mesa-dev
mailing list