[Mesa-dev] [PATCH 06/10] i965/vec4: Optimize unpackSnorm4x8().
Matt Turner
mattst88 at gmail.com
Thu Oct 23 16:44:08 PDT 2014
Reduces the number of instructions needed to implement unpackSnorm4x8()
from 16 -> 6.
---
src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++--
src/mesa/drivers/dri/i965/brw_vec4.h | 1 +
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 32 +++++++++++++++++++++++++-
3 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 5af5515..375e64b 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -92,11 +92,11 @@ brw_lower_packing_builtins(struct brw_context *brw,
| LOWER_PACK_UNORM_2x16
| LOWER_UNPACK_UNORM_2x16
| LOWER_PACK_SNORM_4x8
- | LOWER_UNPACK_SNORM_4x8
| LOWER_PACK_UNORM_4x8;
if (shader_type == MESA_SHADER_FRAGMENT) {
- ops |= LOWER_UNPACK_UNORM_4x8;
+ ops |= LOWER_UNPACK_UNORM_4x8
+ | LOWER_UNPACK_SNORM_4x8;
}
if (brw->gen >= 7) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 116ca06..28e23a7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -533,6 +533,7 @@ public:
void emit_pack_half_2x16(dst_reg dst, src_reg src0);
void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
+ void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 2b7bc07..5f7859b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -573,6 +573,34 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
}
void
+vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
+{
+ /* Instead of splitting the 32-bit integer, shifting, and ORing it back
+ * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
+ * is not suitable to generate the shift values, but we can use the packed
+ * vector float and a type-converting MOV.
+ */
+ uint8_t vf[4] = {0x0, 0x60, 0x70, 0x78};
+ dst_reg shift(this, glsl_type::uvec4_type);
+ emit(MOV(shift, src_reg(vf)));
+
+ dst_reg shifted(this, glsl_type::uvec4_type);
+ src0.swizzle = BRW_SWIZZLE_XXXX;
+ emit(SHR(shifted, src0, src_reg(shift)));
+
+ shifted.type = BRW_REGISTER_TYPE_B;
+ dst_reg f(this, glsl_type::vec4_type);
+ emit(MOV(f, src_reg(shifted)));
+
+ dst_reg scaled(this, glsl_type::vec4_type);
+ emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
+
+ dst_reg max(this, glsl_type::vec4_type);
+ emit_minmax(BRW_CONDITIONAL_G, max, src_reg(scaled), src_reg(-1.0f));
+ emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
+}
+
+void
vec4_visitor::visit_instructions(const exec_list *list)
{
foreach_in_list(ir_instruction, ir, list) {
@@ -1856,12 +1884,14 @@ vec4_visitor::visit(ir_expression *ir)
case ir_unop_unpack_unorm_4x8:
emit_unpack_unorm_4x8(result_dst, op[0]);
break;
+ case ir_unop_unpack_snorm_4x8:
+ emit_unpack_snorm_4x8(result_dst, op[0]);
+ break;
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_snorm_4x8:
case ir_unop_pack_unorm_2x16:
case ir_unop_pack_unorm_4x8:
case ir_unop_unpack_snorm_2x16:
- case ir_unop_unpack_snorm_4x8:
case ir_unop_unpack_unorm_2x16:
unreachable("not reached: should be handled by lower_packing_builtins");
case ir_unop_unpack_half_2x16_split_x:
--
2.0.4
More information about the mesa-dev
mailing list