[Mesa-dev] [PATCH v2 018/103] i965/vec4: add VEC4_OPCODE_SET_{LOW, HIGH}_32BIT opcodes
Ian Romanick
idr at freedesktop.org
Wed Oct 19 00:36:50 UTC 2016
This patch is
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
On 10/11/2016 02:01 AM, Iago Toral Quiroga wrote:
> These opcodes will set the low/high 32-bit in each 64-bit data element
> using Align1 mode. We will use this to implement packDouble2x32.
>
> We use Align1 mode because in order to implement this in Align16 mode
> we would need to use 32-bit logical swizzles (XZ for low, YW for high),
> but the IR works in terms of 64-bit logical swizzles for DF operands
> all the way up to codegen.
>
> v2:
> - use suboffset() instead of get_element_ud()
> - no need to set the width on the dst
> ---
> src/mesa/drivers/dri/i965/brw_defines.h | 2 ++
> src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++++
> src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 ++++
> src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 25 ++++++++++++++++++++++++
> 4 files changed, 35 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 8ffb50c..35d638c 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1102,6 +1102,8 @@ enum opcode {
> VEC4_OPCODE_FLOAT_TO_DOUBLE,
> VEC4_OPCODE_PICK_LOW_32BIT,
> VEC4_OPCODE_PICK_HIGH_32BIT,
> + VEC4_OPCODE_SET_LOW_32BIT,
> + VEC4_OPCODE_SET_HIGH_32BIT,
>
> FS_OPCODE_DDX_COARSE,
> FS_OPCODE_DDX_FINE,
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index b2f3a56..153bd43 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -325,6 +325,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
> return "pick_low_32bit";
> case VEC4_OPCODE_PICK_HIGH_32BIT:
> return "pick_high_32bit";
> + case VEC4_OPCODE_SET_LOW_32BIT:
> + return "set_low_32bit";
> + case VEC4_OPCODE_SET_HIGH_32BIT:
> + return "set_high_32bit";
>
> case FS_OPCODE_DDX_COARSE:
> return "ddx_coarse";
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 4fd04f1..06fa38f 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -257,6 +257,8 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
> case VEC4_OPCODE_FLOAT_TO_DOUBLE:
> case VEC4_OPCODE_PICK_LOW_32BIT:
> case VEC4_OPCODE_PICK_HIGH_32BIT:
> + case VEC4_OPCODE_SET_LOW_32BIT:
> + case VEC4_OPCODE_SET_HIGH_32BIT:
> case VS_OPCODE_PULL_CONSTANT_LOAD:
> case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
> case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
> @@ -514,6 +516,8 @@ vec4_visitor::opt_reduce_swizzle()
> case VEC4_OPCODE_DOUBLE_TO_FLOAT:
> case VEC4_OPCODE_PICK_LOW_32BIT:
> case VEC4_OPCODE_PICK_HIGH_32BIT:
> + case VEC4_OPCODE_SET_LOW_32BIT:
> + case VEC4_OPCODE_SET_HIGH_32BIT:
> swizzle = brw_swizzle_for_size(4);
> break;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index b8778c4..120797b 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -1965,6 +1965,31 @@ generate_code(struct brw_codegen *p,
> break;
> }
>
> + case VEC4_OPCODE_SET_LOW_32BIT:
> + case VEC4_OPCODE_SET_HIGH_32BIT: {
> + /* Reads consecutive 32-bit elements from src[0] and writes
> + * them to the low/high 32-bit of each 64-bit element in dst.
> + */
> + assert(type_sz(src[0].type) == 4);
> + assert(type_sz(dst.type) == 8);
> +
> + brw_set_default_access_mode(p, BRW_ALIGN_1);
> +
> + dst = retype(dst, BRW_REGISTER_TYPE_UD);
> + if (inst->opcode == VEC4_OPCODE_SET_HIGH_32BIT)
> + dst = suboffset(dst, 1);
> + dst.hstride = BRW_HORIZONTAL_STRIDE_2;
> +
> + src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
> + src[0].vstride = BRW_VERTICAL_STRIDE_4;
> + src[0].width = BRW_WIDTH_4;
> + src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
> + brw_MOV(p, dst, src[0]);
> +
> + brw_set_default_access_mode(p, BRW_ALIGN_16);
> + break;
> + }
> +
> case VEC4_OPCODE_PACK_BYTES: {
> /* Is effectively:
> *
>
More information about the mesa-dev
mailing list