[Mesa-dev] [PATCH 19/95] i965/vec4: add VEC4_OPCODE_SET_{LOW, HIGH}_32BIT opcodes
Iago Toral Quiroga
itoral at igalia.com
Tue Jul 19 10:40:16 UTC 2016
These opcodes will set the low/high 32-bit in each 64-bit data element
using Align1 mode. We will use this to implement packDouble2x32.
We can't do this in Align16 because we would need data to cross the
vec4 boundary.
---
src/mesa/drivers/dri/i965/brw_defines.h | 2 ++
src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++++
src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 ++++
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 26 ++++++++++++++++++++++++
4 files changed, 36 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f2da29d..ea5c273 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1098,6 +1098,8 @@ enum opcode {
VEC4_OPCODE_FLOAT_TO_DOUBLE,
VEC4_OPCODE_PICK_LOW_32BIT,
VEC4_OPCODE_PICK_HIGH_32BIT,
+ VEC4_OPCODE_SET_LOW_32BIT,
+ VEC4_OPCODE_SET_HIGH_32BIT,
FS_OPCODE_DDX_COARSE,
FS_OPCODE_DDX_FINE,
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index d662e15..e0c38e5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -321,6 +321,10 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op)
return "pick_low_32bit";
case VEC4_OPCODE_PICK_HIGH_32BIT:
return "pick_high_32bit";
+ case VEC4_OPCODE_SET_LOW_32BIT:
+ return "set_low_32bit";
+ case VEC4_OPCODE_SET_HIGH_32BIT:
+ return "set_high_32bit";
case FS_OPCODE_DDX_COARSE:
return "ddx_coarse";
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 87a93c9..05109a4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -247,6 +247,8 @@ vec4_instruction::can_do_writemask(const struct brw_device_info *devinfo)
case VEC4_OPCODE_FLOAT_TO_DOUBLE:
case VEC4_OPCODE_PICK_LOW_32BIT:
case VEC4_OPCODE_PICK_HIGH_32BIT:
+ case VEC4_OPCODE_SET_LOW_32BIT:
+ case VEC4_OPCODE_SET_HIGH_32BIT:
case VS_OPCODE_PULL_CONSTANT_LOAD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
@@ -502,6 +504,8 @@ vec4_visitor::opt_reduce_swizzle()
case VEC4_OPCODE_DOUBLE_TO_FLOAT:
case VEC4_OPCODE_PICK_LOW_32BIT:
case VEC4_OPCODE_PICK_HIGH_32BIT:
+ case VEC4_OPCODE_SET_LOW_32BIT:
+ case VEC4_OPCODE_SET_HIGH_32BIT:
swizzle = brw_swizzle_for_size(4);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 9421ee5..5f48df1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1980,6 +1980,32 @@ generate_code(struct brw_codegen *p,
break;
}
+ case VEC4_OPCODE_SET_LOW_32BIT:
+ case VEC4_OPCODE_SET_HIGH_32BIT: {
+ /* Reads consecutive 32-bit elements from src[0] and writes
+ * them to the low/high 32-bit of each 64-bit element in dst.
+ */
+ assert(type_sz(src[0].type) == 4);
+ assert(type_sz(dst.type) == 8);
+
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+ dst = retype(dst, BRW_REGISTER_TYPE_UD);
+ if (inst->opcode == VEC4_OPCODE_SET_HIGH_32BIT)
+ dst = get_element_ud(dst, 1);
+ dst.hstride = BRW_HORIZONTAL_STRIDE_2;
+ dst.width = BRW_WIDTH_4;
+
+ src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
+ src[0].vstride = BRW_VERTICAL_STRIDE_4;
+ src[0].width = BRW_WIDTH_4;
+ src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
+ brw_MOV(p, dst, src[0]);
+
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ break;
+ }
+
case VEC4_OPCODE_PACK_BYTES: {
/* Is effectively:
*
--
2.7.4
More information about the mesa-dev
mailing list