[Mesa-dev] [PATCH 2/2] i965/vs: Use GRFs for pull constant offsets on gen7.
Eric Anholt
eric at anholt.net
Thu Apr 4 16:14:42 PDT 2013
This allows the computation of the offset to get written directly into the
message source. Improves performance of low-resolution GLB2.7 by 4.6% +/-
1.4% (n=11).
---
src/mesa/drivers/dri/i965/brw_defines.h | 1 +
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++
src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 ++++-
src/mesa/drivers/dri/i965/brw_vec4.h | 4 +++
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 45 +++++++++++++++---------
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 +++++++---
6 files changed, 56 insertions(+), 22 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 3d07c36..a13f9dc 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -739,6 +739,7 @@ enum opcode {
VS_OPCODE_SCRATCH_READ,
VS_OPCODE_SCRATCH_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
+ VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
};
#define BRW_PREDICATE_NONE 0
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1a52039..b3bd1b9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -498,6 +498,8 @@ brw_instruction_name(enum opcode op)
return "scratch_write";
case VS_OPCODE_PULL_CONSTANT_LOAD:
return "pull_constant_load";
+ case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+ return "pull_constant_load_gen7";
default:
/* Yes, this leaks. It's in debug code, it should never occur, and if
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c58fb44..1013aae 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -223,7 +223,13 @@ vec4_instruction::is_math()
bool
vec4_instruction::is_send_from_grf()
{
- return opcode == SHADER_OPCODE_SHADER_TIME_ADD;
+ switch (opcode) {
+ case SHADER_OPCODE_SHADER_TIME_ADD:
+ case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+ return true;
+ default:
+ return false;
+ }
}
bool
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 8f130e1..e286925 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -544,6 +544,10 @@ private:
struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
+ void generate_pull_constant_load_gen7(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset);
struct brw_context *brw;
struct intel_context *intel;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index e378f7f..963901c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -558,27 +558,11 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
struct brw_reg index,
struct brw_reg offset)
{
+ assert(intel->gen <= 7);
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
- if (intel->gen == 7) {
- gen6_resolve_implied_move(p, &offset, inst->base_mrf);
- brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(p, insn, dst);
- brw_set_src0(p, insn, offset);
- brw_set_sampler_message(p, insn,
- surf_index,
- 0, /* LD message ignores sampler unit */
- GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
- 1, /* rlen */
- 1, /* mlen */
- false, /* no header */
- BRW_SAMPLER_SIMD_MODE_SIMD4X2,
- 0);
- return;
- }
-
struct brw_reg header = brw_vec8_grf(0, 0);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
@@ -614,6 +598,29 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
}
void
+vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset)
+{
+ assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
+ surf_index.type == BRW_REGISTER_TYPE_UD);
+
+ brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, insn, dst);
+ brw_set_src0(p, insn, offset);
+ brw_set_sampler_message(p, insn,
+ surf_index.dw1.ud,
+ 0, /* LD message ignores sampler unit */
+ GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
+ 1, /* rlen */
+ 1, /* mlen */
+ false, /* no header */
+ BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+ 0);
+}
+
+void
vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
struct brw_reg dst,
struct brw_reg *src)
@@ -673,6 +680,10 @@ vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
generate_pull_constant_load(inst, dst, src[0], src[1]);
break;
+ case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+ generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+ break;
+
case SHADER_OPCODE_SHADER_TIME_ADD:
brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ce07381..3927161 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2882,10 +2882,20 @@ vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
vec4_instruction *load;
- load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
- temp, index, offset);
- load->base_mrf = 14;
- load->mlen = 1;
+ if (intel->gen >= 7) {
+ dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+ grf_offset.type = offset.type;
+ emit_before(inst, MOV(grf_offset, offset));
+
+ load = new(mem_ctx) vec4_instruction(this,
+ VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ temp, index, src_reg(grf_offset));
+ } else {
+ load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
+ temp, index, offset);
+ load->base_mrf = 14;
+ load->mlen = 1;
+ }
emit_before(inst, load);
}
--
1.7.10.4
More information about the mesa-dev
mailing list