[Mesa-dev] [PATCH v2 018/103] i965/vec4: add VEC4_OPCODE_SET_{LOW, HIGH}_32BIT opcodes

Ian Romanick idr at freedesktop.org
Wed Oct 19 00:36:50 UTC 2016


This patch is

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>

On 10/11/2016 02:01 AM, Iago Toral Quiroga wrote:
> These opcodes will set the low/high 32-bit in each 64-bit data element
> using Align1 mode. We will use this to implement packDouble2x32.
> 
> We use Align1 mode because in order to implement this in Align16 mode
> we would need to use 32-bit logical swizzles (XZ for low, YW for high),
> but the IR works in terms of 64-bit logical swizzles for DF operands
> all the way up to codegen.
> 
> v2:
>  - use suboffset() instead of get_element_ud()
>  - no need to set the width on the dst
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h          |  2 ++
>  src/mesa/drivers/dri/i965/brw_shader.cpp         |  4 ++++
>  src/mesa/drivers/dri/i965/brw_vec4.cpp           |  4 ++++
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 25 ++++++++++++++++++++++++
>  4 files changed, 35 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 8ffb50c..35d638c 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1102,6 +1102,8 @@ enum opcode {
>     VEC4_OPCODE_FLOAT_TO_DOUBLE,
>     VEC4_OPCODE_PICK_LOW_32BIT,
>     VEC4_OPCODE_PICK_HIGH_32BIT,
> +   VEC4_OPCODE_SET_LOW_32BIT,
> +   VEC4_OPCODE_SET_HIGH_32BIT,
>  
>     FS_OPCODE_DDX_COARSE,
>     FS_OPCODE_DDX_FINE,
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index b2f3a56..153bd43 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -325,6 +325,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
>        return "pick_low_32bit";
>     case VEC4_OPCODE_PICK_HIGH_32BIT:
>        return "pick_high_32bit";
> +   case VEC4_OPCODE_SET_LOW_32BIT:
> +      return "set_low_32bit";
> +   case VEC4_OPCODE_SET_HIGH_32BIT:
> +      return "set_high_32bit";
>  
>     case FS_OPCODE_DDX_COARSE:
>        return "ddx_coarse";
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 4fd04f1..06fa38f 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -257,6 +257,8 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
>     case VEC4_OPCODE_FLOAT_TO_DOUBLE:
>     case VEC4_OPCODE_PICK_LOW_32BIT:
>     case VEC4_OPCODE_PICK_HIGH_32BIT:
> +   case VEC4_OPCODE_SET_LOW_32BIT:
> +   case VEC4_OPCODE_SET_HIGH_32BIT:
>     case VS_OPCODE_PULL_CONSTANT_LOAD:
>     case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
>     case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
> @@ -514,6 +516,8 @@ vec4_visitor::opt_reduce_swizzle()
>        case VEC4_OPCODE_DOUBLE_TO_FLOAT:
>        case VEC4_OPCODE_PICK_LOW_32BIT:
>        case VEC4_OPCODE_PICK_HIGH_32BIT:
> +      case VEC4_OPCODE_SET_LOW_32BIT:
> +      case VEC4_OPCODE_SET_HIGH_32BIT:
>           swizzle = brw_swizzle_for_size(4);
>           break;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index b8778c4..120797b 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -1965,6 +1965,31 @@ generate_code(struct brw_codegen *p,
>           break;
>        }
>  
> +      case VEC4_OPCODE_SET_LOW_32BIT:
> +      case VEC4_OPCODE_SET_HIGH_32BIT: {
> +         /* Reads consecutive 32-bit elements from src[0] and writes
> +          * them to the low/high 32-bit of each 64-bit element in dst.
> +          */
> +         assert(type_sz(src[0].type) == 4);
> +         assert(type_sz(dst.type) == 8);
> +
> +         brw_set_default_access_mode(p, BRW_ALIGN_1);
> +
> +         dst = retype(dst, BRW_REGISTER_TYPE_UD);
> +         if (inst->opcode == VEC4_OPCODE_SET_HIGH_32BIT)
> +            dst = suboffset(dst, 1);
> +         dst.hstride = BRW_HORIZONTAL_STRIDE_2;
> +
> +         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
> +         src[0].vstride = BRW_VERTICAL_STRIDE_4;
> +         src[0].width = BRW_WIDTH_4;
> +         src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
> +         brw_MOV(p, dst, src[0]);
> +
> +         brw_set_default_access_mode(p, BRW_ALIGN_16);
> +         break;
> +      }
> +
>        case VEC4_OPCODE_PACK_BYTES: {
>           /* Is effectively:
>            *
> 



More information about the mesa-dev mailing list