<div dir="ltr">On 21 October 2013 17:48, Eric Anholt <span dir="ltr"><<a href="mailto:eric@anholt.net" target="_blank">eric@anholt.net</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
This avoids a lot of message setup we had to do otherwise. Improves<br>
GLB2.7 performance with register spilling force enabled by 1.6442% +/-<br>
0.553218% (n=4).<br>
---<br>
src/mesa/drivers/dri/i965/brw_defines.h | 7 ++++<br>
src/mesa/drivers/dri/i965/brw_eu.h | 5 +++<br>
src/mesa/drivers/dri/i965/brw_eu_emit.c | 41 ++++++++++++++++++++++<br>
src/mesa/drivers/dri/i965/brw_fs.h | 1 +<br>
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 10 ++++++<br>
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 21 +++++++----<br>
.../drivers/dri/i965/brw_schedule_instructions.cpp | 12 +++++++<br>
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++<br>
8 files changed, 93 insertions(+), 6 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h<br>
index 72a0891..276ab44 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_defines.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_defines.h<br>
@@ -776,6 +776,7 @@ enum opcode {<br>
<br>
SHADER_OPCODE_GEN4_SCRATCH_READ,<br>
SHADER_OPCODE_GEN4_SCRATCH_WRITE,<br>
+ SHADER_OPCODE_GEN7_SCRATCH_READ,<br>
<br>
FS_OPCODE_DDX,<br>
FS_OPCODE_DDY,<br>
@@ -1135,6 +1136,12 @@ enum brw_message_target {<br>
#define GEN7_DATAPORT_DC_BYTE_SCATTERED_WRITE 12<br>
#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE 13<br>
<br>
+#define GEN7_DATAPORT_SCRATCH_READ ((1 << 18) | \<br>
+ (0 << 17))<br>
+#define GEN7_DATAPORT_SCRATCH_WRITE ((1 << 18) | \<br>
+ (1 << 17))<br>
+#define GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT 12<br>
+<br>
/* HSW */<br>
#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ 0<br>
#define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ 1<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h<br>
index 072310d..a307948 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu.h<br>
@@ -379,6 +379,11 @@ void brw_oword_block_write_scratch(struct brw_compile *p,<br>
int num_regs,<br>
GLuint offset);<br>
<br>
+void gen7_block_read_scratch(struct brw_compile *p,<br>
+ struct brw_reg dest,<br>
+ int num_regs,<br>
+ GLuint offset);<br>
+<br>
void brw_shader_time_add(struct brw_compile *p,<br>
struct brw_reg payload,<br>
uint32_t surf_index);<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
index 8efd679..accf324 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
@@ -2055,6 +2055,47 @@ brw_oword_block_read_scratch(struct brw_compile *p,<br>
}<br>
}<br>
<br>
+void<br>
+gen7_block_read_scratch(struct brw_compile *p,<br>
+ struct brw_reg dest,<br>
+ int num_regs,<br>
+ GLuint offset)<br>
+{<br>
+ dest = retype(dest, BRW_REGISTER_TYPE_UW);<br>
+<br>
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);<br>
+<br>
+ assert(insn->header.predicate_control == 0);<br></blockquote><div><br></div><div>Can we please use BRW_PREDICATE_NONE instead of 0 here?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ insn->header.compression_control = BRW_COMPRESSION_NONE;<br>
+<br>
+ brw_set_dest(p, insn, dest);<br>
+<br>
+ /* The HW requires that the header is present; this is to get the g0.5<br>
+ * scratch offset.<br>
+ */<br>
+ bool header_present = true;<br>
+ brw_set_src0(p, insn, brw_vec8_grf(0, 0));<br>
+<br>
+ brw_set_message_descriptor(p, insn,<br>
+ GEN7_SFID_DATAPORT_DATA_CACHE,<br>
+ 1, /* mlen: just g0 */<br>
+ num_regs,<br>
+ header_present,<br>
+ false);<br>
+<br>
+ insn->bits3.ud |= GEN7_DATAPORT_SCRATCH_READ;<br>
+<br>
+ assert(num_regs == 1 || num_regs == 2 || num_regs == 4);<br>
+ insn->bits3.ud |= (num_regs - 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT;<br>
+<br>
+ /* The "HWORD" unit in the docs just happens to mean "the size of a<br>
+ * register"<br>
+ */<br></blockquote><div><br></div><div>I had to search the docs for a while to understand what this comment was referring to. How about saying something like this instead?<br><br> /* According to the docs, offset is "A 12-bit HWord offset into the memory<br>
* Immediate Memory buffer as specified by binding table 0xFF." An HWORD<br> * is 32 bytes, which happens to be the size of a register.<br> */<br><br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ offset /= REG_SIZE;<br>
+ assert(offset < (1 << 12));<br>
+ insn->bits3.ud |= offset;<br>
+}<br>
+<br>
/**<br>
* Read a float[4] vector from the data port Data Cache (const buffer).<br>
* Location (in buffer) should be a multiple of 16.<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h<br>
index f9c87c7..432f3df 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.h<br>
@@ -521,6 +521,7 @@ private:<br>
bool negate_value);<br>
void generate_scratch_write(fs_inst *inst, struct brw_reg src);<br>
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);<br>
+ void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);<br>
void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,<br>
struct brw_reg index,<br>
struct brw_reg offset);<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
index 6aebc41..fa8bccd 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
@@ -732,6 +732,12 @@ fs_generator::generate_scratch_read(fs_inst *inst, struct brw_reg dst)<br>
}<br>
<br>
void<br>
+fs_generator::generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst)<br>
+{<br>
+ gen7_block_read_scratch(p, dst, dispatch_width / 8, inst->offset);<br>
+}<br>
+<br>
+void<br>
fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,<br>
struct brw_reg dst,<br>
struct brw_reg index,<br>
@@ -1517,6 +1523,10 @@ fs_generator::generate_code(exec_list *instructions)<br>
generate_scratch_read(inst, dst);<br>
break;<br>
<br>
+ case SHADER_OPCODE_GEN7_SCRATCH_READ:<br>
+ generate_scratch_read_gen7(inst, dst);<br>
+ break;<br>
+<br>
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:<br>
generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);<br>
break;<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp<br>
index 75090a6..a78c1af 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp<br>
@@ -527,17 +527,25 @@ fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset,<br>
int count)<br>
{<br>
for (int i = 0; i < count; i++) {<br>
+ /* The gen7 descriptor-based offset is 12 bits of HWORD units. */<br>
+ bool gen7_read = brw->gen >= 7 && spill_offset < (1 << 12) * REG_SIZE;<br>
+<br>
fs_inst *unspill_inst =<br>
- new(mem_ctx) fs_inst(SHADER_OPCODE_GEN4_SCRATCH_READ, dst);<br>
+ new(mem_ctx) fs_inst(gen7_read ?<br>
+ SHADER_OPCODE_GEN7_SCRATCH_READ :<br>
+ SHADER_OPCODE_GEN4_SCRATCH_READ,<br>
+ dst);<br>
unspill_inst->offset = spill_offset;<br>
unspill_inst->ir = inst->ir;<br>
unspill_inst->annotation = inst->annotation;<br>
<br>
- /* Choose a MRF that won't conflict with an MRF that's live across the<br>
- * spill. Nothing else will make it up to MRF 14/15.<br>
- */<br>
- unspill_inst->base_mrf = 14;<br>
- unspill_inst->mlen = 1; /* header contains offset */<br>
+ if (!gen7_read) {<br>
+ /* Choose a MRF that won't conflict with an MRF that's live across<br>
+ * the spill. Nothing else will make it up to MRF 14/15.<br>
+ */<br>
+ unspill_inst->base_mrf = 14;<br>
+ unspill_inst->mlen = 1; /* header contains offset */<br>
+ }<br>
inst->insert_before(unspill_inst);<br>
<br>
dst.reg_offset++;<br>
@@ -605,6 +613,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)<br>
break;<br>
<br>
case SHADER_OPCODE_GEN4_SCRATCH_READ:<br>
+ case SHADER_OPCODE_GEN7_SCRATCH_READ:<br>
if (inst->dst.file == GRF)<br>
no_spill[inst->dst.reg] = true;<br>
break;<br>
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp<br>
index 9a480b4..a4e0be3 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp<br>
@@ -329,6 +329,18 @@ schedule_node::set_latency_gen7(bool is_haswell)<br>
latency = 200;<br>
break;<br>
<br>
+ case SHADER_OPCODE_GEN7_SCRATCH_READ:<br>
+ /* Testing a load from offset 0, that had been previously written:<br>
+ *<br>
+ * send(8) g114<1>UW g0<8,8,1>F data (0, 0, 0) mlen 1 rlen 1 { align1 WE_normal 1Q };<br>
+ * mov(8) null g114<8,8,1>F { align1 WE_normal 1Q };<br>
+ *<br>
+ * The cycles spent seemed to be grouped around 40-50 (as low as 38),<br>
+ * then around 140. Presumably this is cache hit vs miss.<br>
+ */<br>
+ latency = 50;<br>
+ break;<br>
+<br>
default:<br>
/* 2 cycles:<br>
* mul(8) g4<1>F g2<0,1,0>F 0.5F { align1 WE_normal 1Q };<br>
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp<br>
index 19421ba..81930ae 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp<br>
@@ -442,6 +442,8 @@ brw_instruction_name(enum opcode op)<br>
return "gen4_scratch_read";<br>
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:<br>
return "gen4_scratch_write";<br>
+ case SHADER_OPCODE_GEN7_SCRATCH_READ:<br>
+ return "gen7_scratch_read";<br>
<br>
case FS_OPCODE_DDX:<br>
return "ddx";<br>
<span class=""><font color="#888888">--<br>
1.8.4.rc3<br></font></span></blockquote><div><br></div><div>With those changes, this patch is:<br><br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br><br></div><div>
I sent a comment on patch 2 earlier. The remainder are:<br><br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br></div></div></div></div>