[Mesa-dev] [PATCH v2 017/103] i965/vec4: add VEC4_OPCODE_PICK_{LOW, HIGH}_32BIT opcodes

Ian Romanick idr at freedesktop.org
Wed Oct 19 00:36:14 UTC 2016


This patch is

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>

We may be able to eliminate some of this after I do int64 support.  It
might be cleaner to do unpackInt2x32(doubleBitsToInt64(x)) at a higher
level of the compiler instead.

On 10/11/2016 02:01 AM, Iago Toral Quiroga wrote:
> These opcodes will pick the low/high 32-bit in each 64-bit data element
> using Align1 mode. We will use this, for example, to do things like
> unpackDouble2x32.
> 
> We use Align1 mode because in order to implement this in Align16 mode
> we would need to use 32-bit logical swizzles (XZ for low, YW for high),
> but the IR works in terms of 64-bit logical swizzles for DF operands
> all the way up to codegen.
> 
> v2:
>  - use suboffset() instead of get_element_ud()
>  - no need to set the width on the dst
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h          |  2 ++
>  src/mesa/drivers/dri/i965/brw_shader.cpp         |  4 ++++
>  src/mesa/drivers/dri/i965/brw_vec4.cpp           |  4 ++++
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 25 ++++++++++++++++++++++++
>  4 files changed, 35 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 79b96a4..8ffb50c 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1100,6 +1100,8 @@ enum opcode {
>     VEC4_OPCODE_UNPACK_UNIFORM,
>     VEC4_OPCODE_DOUBLE_TO_FLOAT,
>     VEC4_OPCODE_FLOAT_TO_DOUBLE,
> +   VEC4_OPCODE_PICK_LOW_32BIT,
> +   VEC4_OPCODE_PICK_HIGH_32BIT,
>  
>     FS_OPCODE_DDX_COARSE,
>     FS_OPCODE_DDX_FINE,
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index b063f77..b2f3a56 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -321,6 +321,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
>        return "double_to_float";
>     case VEC4_OPCODE_FLOAT_TO_DOUBLE:
>        return "float_to_double";
> +   case VEC4_OPCODE_PICK_LOW_32BIT:
> +      return "pick_low_32bit";
> +   case VEC4_OPCODE_PICK_HIGH_32BIT:
> +      return "pick_high_32bit";
>  
>     case FS_OPCODE_DDX_COARSE:
>        return "ddx_coarse";
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 40f8702..4fd04f1 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -255,6 +255,8 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
>     case SHADER_OPCODE_GEN4_SCRATCH_READ:
>     case VEC4_OPCODE_DOUBLE_TO_FLOAT:
>     case VEC4_OPCODE_FLOAT_TO_DOUBLE:
> +   case VEC4_OPCODE_PICK_LOW_32BIT:
> +   case VEC4_OPCODE_PICK_HIGH_32BIT:
>     case VS_OPCODE_PULL_CONSTANT_LOAD:
>     case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
>     case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
> @@ -510,6 +512,8 @@ vec4_visitor::opt_reduce_swizzle()
>  
>        case VEC4_OPCODE_FLOAT_TO_DOUBLE:
>        case VEC4_OPCODE_DOUBLE_TO_FLOAT:
> +      case VEC4_OPCODE_PICK_LOW_32BIT:
> +      case VEC4_OPCODE_PICK_HIGH_32BIT:
>           swizzle = brw_swizzle_for_size(4);
>           break;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index 6f4c438..b8778c4 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -1940,6 +1940,31 @@ generate_code(struct brw_codegen *p,
>           break;
>        }
>  
> +      case VEC4_OPCODE_PICK_LOW_32BIT:
> +      case VEC4_OPCODE_PICK_HIGH_32BIT: {
> +         /* Stores the low/high 32-bit of each 64-bit element in src[0] into
> +          * dst using ALIGN1 mode and a <8,4,2>:UD region on the source.
> +          */
> +         assert(type_sz(src[0].type) == 8);
> +         assert(type_sz(dst.type) == 4);
> +
> +         brw_set_default_access_mode(p, BRW_ALIGN_1);
> +
> +         dst = retype(dst, BRW_REGISTER_TYPE_UD);
> +         dst.hstride = BRW_HORIZONTAL_STRIDE_1;
> +
> +         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
> +         if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT)
> +            src[0] = suboffset(src[0], 1);
> +         src[0].vstride = BRW_VERTICAL_STRIDE_8;
> +         src[0].width = BRW_WIDTH_4;
> +         src[0].hstride = BRW_HORIZONTAL_STRIDE_2;
> +         brw_MOV(p, dst, src[0]);
> +
> +         brw_set_default_access_mode(p, BRW_ALIGN_16);
> +         break;
> +      }
> +
>        case VEC4_OPCODE_PACK_BYTES: {
>           /* Is effectively:
>            *
> 



More information about the mesa-dev mailing list