[Mesa-dev] [PATCH v2 017/103] i965/vec4: add VEC4_OPCODE_PICK_{LOW, HIGH}_32BIT opcodes
Ian Romanick
idr at freedesktop.org
Wed Oct 19 00:36:14 UTC 2016
This patch is
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
We may be able to eliminate some of this after I do int64 support. It
might be cleaner to do unpackInt2x32(doubleBitsToInt64(x)) at a higher
level of the compiler instead.
On 10/11/2016 02:01 AM, Iago Toral Quiroga wrote:
> These opcodes will pick the low/high 32-bit in each 64-bit data element
> using Align1 mode. We will use this, for example, to do things like
> unpackDouble2x32.
>
> We use Align1 mode because in order to implement this in Align16 mode
> we would need to use 32-bit logical swizzles (XZ for low, YW for high),
> but the IR works in terms of 64-bit logical swizzles for DF operands
> all the way up to codegen.
>
> v2:
> - use suboffset() instead of get_element_ud()
> - no need to set the width on the dst
> ---
> src/mesa/drivers/dri/i965/brw_defines.h | 2 ++
> src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++++
> src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 ++++
> src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 25 ++++++++++++++++++++++++
> 4 files changed, 35 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 79b96a4..8ffb50c 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1100,6 +1100,8 @@ enum opcode {
> VEC4_OPCODE_UNPACK_UNIFORM,
> VEC4_OPCODE_DOUBLE_TO_FLOAT,
> VEC4_OPCODE_FLOAT_TO_DOUBLE,
> + VEC4_OPCODE_PICK_LOW_32BIT,
> + VEC4_OPCODE_PICK_HIGH_32BIT,
>
> FS_OPCODE_DDX_COARSE,
> FS_OPCODE_DDX_FINE,
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index b063f77..b2f3a56 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -321,6 +321,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
> return "double_to_float";
> case VEC4_OPCODE_FLOAT_TO_DOUBLE:
> return "float_to_double";
> + case VEC4_OPCODE_PICK_LOW_32BIT:
> + return "pick_low_32bit";
> + case VEC4_OPCODE_PICK_HIGH_32BIT:
> + return "pick_high_32bit";
>
> case FS_OPCODE_DDX_COARSE:
> return "ddx_coarse";
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 40f8702..4fd04f1 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -255,6 +255,8 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
> case SHADER_OPCODE_GEN4_SCRATCH_READ:
> case VEC4_OPCODE_DOUBLE_TO_FLOAT:
> case VEC4_OPCODE_FLOAT_TO_DOUBLE:
> + case VEC4_OPCODE_PICK_LOW_32BIT:
> + case VEC4_OPCODE_PICK_HIGH_32BIT:
> case VS_OPCODE_PULL_CONSTANT_LOAD:
> case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
> case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
> @@ -510,6 +512,8 @@ vec4_visitor::opt_reduce_swizzle()
>
> case VEC4_OPCODE_FLOAT_TO_DOUBLE:
> case VEC4_OPCODE_DOUBLE_TO_FLOAT:
> + case VEC4_OPCODE_PICK_LOW_32BIT:
> + case VEC4_OPCODE_PICK_HIGH_32BIT:
> swizzle = brw_swizzle_for_size(4);
> break;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index 6f4c438..b8778c4 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -1940,6 +1940,31 @@ generate_code(struct brw_codegen *p,
> break;
> }
>
> + case VEC4_OPCODE_PICK_LOW_32BIT:
> + case VEC4_OPCODE_PICK_HIGH_32BIT: {
> + /* Stores the low/high 32-bit of each 64-bit element in src[0] into
> + * dst using ALIGN1 mode and a <8,4,2>:UD region on the source.
> + */
> + assert(type_sz(src[0].type) == 8);
> + assert(type_sz(dst.type) == 4);
> +
> + brw_set_default_access_mode(p, BRW_ALIGN_1);
> +
> + dst = retype(dst, BRW_REGISTER_TYPE_UD);
> + dst.hstride = BRW_HORIZONTAL_STRIDE_1;
> +
> + src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
> + if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT)
> + src[0] = suboffset(src[0], 1);
> + src[0].vstride = BRW_VERTICAL_STRIDE_8;
> + src[0].width = BRW_WIDTH_4;
> + src[0].hstride = BRW_HORIZONTAL_STRIDE_2;
> + brw_MOV(p, dst, src[0]);
> +
> + brw_set_default_access_mode(p, BRW_ALIGN_16);
> + break;
> + }
> +
> case VEC4_OPCODE_PACK_BYTES: {
> /* Is effectively:
> *
>
More information about the mesa-dev
mailing list