<div dir="ltr">On 2 December 2013 11:39, Francisco Jerez <span dir="ltr"><<a href="mailto:currojerez@riseup.net" target="_blank">currojerez@riseup.net</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote">
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">---<br>
src/mesa/drivers/dri/i965/brw_defines.h | 1 +<br>
src/mesa/drivers/dri/i965/brw_eu.h | 8 ++++<br>
src/mesa/drivers/dri/i965/brw_eu_emit.c | 49 ++++++++++++++++++++++<br>
src/mesa/drivers/dri/i965/brw_fs.cpp | 1 +<br>
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 6 +++<br>
.../drivers/dri/i965/brw_schedule_instructions.cpp | 1 +<br>
src/mesa/drivers/dri/i965/brw_shader.cpp | 1 +<br>
src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 +<br>
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 6 +++<br>
9 files changed, 74 insertions(+)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h<br>
index 67a2aaa..988b07e 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_defines.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_defines.h<br>
@@ -778,6 +778,7 @@ enum opcode {<br>
<br>
SHADER_OPCODE_UNTYPED_ATOMIC,<br>
SHADER_OPCODE_UNTYPED_SURFACE_READ,<br>
+ SHADER_OPCODE_UNTYPED_SURFACE_WRITE,<br>
<br>
SHADER_OPCODE_GEN4_SCRATCH_READ,<br>
SHADER_OPCODE_GEN4_SCRATCH_WRITE,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h<br>
index 45b421b..e17dc49 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu.h<br>
@@ -375,6 +375,14 @@ brw_untyped_surface_read(struct brw_compile *p,<br>
unsigned msg_length,<br>
unsigned num_channels);<br>
<br>
+void<br>
+brw_untyped_surface_write(struct brw_compile *p,<br>
+ struct brw_reg dst,<br>
+ struct brw_reg mrf,<br>
+ struct brw_reg surface,<br>
+ unsigned msg_length,<br>
+ unsigned num_channels);<br>
+<br>
/***********************************************************************<br>
* brw_eu_util.c:<br>
*/<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
index b94a6d1..13dd59a 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
@@ -2704,6 +2704,55 @@ brw_untyped_surface_read(struct brw_compile *p,<br>
brw_send_indirect_message(p, sfid, dst, mrf, desc);<br>
}<br>
<br>
+static void<br>
+brw_set_dp_untyped_surface_write_message(struct brw_compile *p,<br>
+ struct brw_instruction *insn,<br>
+ unsigned num_channels)<br>
+{<br>
+ insn->bits3.gen7_dp.msg_type = (p->brw->is_haswell ?<br>
+ HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE :<br>
+ GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE);<br>
+<br>
+ if (p->current->header.access_mode == BRW_ALIGN_1) {<br>
+ if (p->compressed)<br>
+ insn->bits3.ud |= 1 << 12; /* SIMD16 mode */<br>
+ else<br>
+ insn->bits3.ud |= 2 << 12; /* SIMD8 mode */<br>
+ } else {<br>
+ if (p->brw->is_haswell)<br>
+ insn->bits3.ud |= 2 << 12; /* SIMD4x2 mode */<br></blockquote><div><br></div><div>This looks like a mistake. Did you mean "|= 0 << 12;"?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ else<br>
+ insn->bits3.ud |= 2 << 12; /* SIMD8 mode */<br></blockquote><div><br></div><div>I remember when we were discussing the atomic operations, you had an argument for why it was safe to use SIMD8 mode on IVB when compiling a SIMD4x2 shader. Does that argument still apply here? Can you recap it in a comment, please?<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ }<br>
+<br>
+ /* Set mask of 32-bit channels to drop. */<br>
+ insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8;<br>
+}<br>
+<br>
+void<br>
+brw_untyped_surface_write(struct brw_compile *p,<br>
+ struct brw_reg dst,<br>
+ struct brw_reg mrf,<br>
+ struct brw_reg surface,<br>
+ unsigned msg_length,<br>
+ unsigned num_channels)<br>
+{<br>
+ const unsigned sfid = (p->brw->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 :<br>
+ GEN7_SFID_DATAPORT_DATA_CACHE);<br>
+ const bool header_present = p->current->header.access_mode == BRW_ALIGN_1;<br>
+ struct brw_reg desc = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);<br>
+ struct brw_instruction *insn;<br>
+<br>
+ insn = brw_load_indirect_message_descriptor(<br>
+ p, desc, surface, msg_length, 0,<br>
+ header_present);<br>
+<br>
+ brw_set_dp_untyped_surface_write_message(<br>
+ p, insn, num_channels);<br>
+<br>
+ brw_send_indirect_message(p, sfid, dst, mrf, desc);<br>
+}<br>
+<br>
/**<br>
* This instruction is generated as a single-channel align1 instruction by<br>
* both the VS and FS stages when using INTEL_DEBUG=shader_time.<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index 4408cbe..721162f 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -781,6 +781,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)<br>
return 2;<br>
case SHADER_OPCODE_UNTYPED_ATOMIC:<br>
case SHADER_OPCODE_UNTYPED_SURFACE_READ:<br>
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:<br>
return 0;<br>
default:<br>
assert(!"not reached");<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
index 0d50051..2ebb90a 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
@@ -1690,6 +1690,12 @@ fs_generator::generate_code(exec_list *instructions)<br>
src[0], inst->mlen, src[1].dw1.ud);<br>
break;<br>
<br>
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:<br>
+ assert(src[1].file == BRW_IMMEDIATE_VALUE);<br>
+ brw_untyped_surface_write(p, dst, brw_message_reg(inst->base_mrf),<br>
+ src[0], inst->mlen, src[1].dw1.ud);<br>
+ break;<br>
+<br></blockquote><div><br></div><div>As in the previous patch, I'm concerned that brw_mark_surface_used() isn't being called.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
case FS_OPCODE_SET_SIMD4X2_OFFSET:<br>
generate_set_simd4x2_offset(inst, dst, src[0]);<br>
break;<br>
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp<br>
index baf67fb..39b63bc 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp<br>
@@ -356,6 +356,7 @@ schedule_node::set_latency_gen7(bool is_haswell)<br>
break;<br>
<br>
case SHADER_OPCODE_UNTYPED_SURFACE_READ:<br>
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:<br>
/* Test code:<br>
* mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };<br>
* mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };<br></blockquote><div><br></div><div>Surface write has the same latency as surface read? From a scheduling perspective I would expect the latency of surface write to be very small, since there is no response message (and therefore there is nothing to stop the EU from dispatching additional instructions after the SEND).<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp<br>
index 128354a..2824515 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp<br>
@@ -659,6 +659,7 @@ bool<br>
backend_instruction::has_side_effects() const<br>
{<br>
switch (opcode) {<br>
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:<br>
case SHADER_OPCODE_UNTYPED_ATOMIC:<br>
return true;<br>
default:<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp<br>
index 9d5d26f..2bf2c88 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp<br>
@@ -293,6 +293,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)<br>
return inst->header_present ? 1 : 0;<br>
case SHADER_OPCODE_UNTYPED_ATOMIC:<br>
case SHADER_OPCODE_UNTYPED_SURFACE_READ:<br>
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:<br>
return 0;<br>
default:<br>
assert(!"not reached");<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp<br>
index d29c3dd..f06282c 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp<br>
@@ -1172,6 +1172,12 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,<br>
src[0], inst->mlen, src[1].dw1.ud);<br>
break;<br>
<br>
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:<br>
+ assert(src[1].file == BRW_IMMEDIATE_VALUE);<br>
+ brw_untyped_surface_write(p, dst, brw_message_reg(inst->base_mrf),<br>
+ src[0], inst->mlen, src[1].dw1.ud);<br>
+ break;<br>
+<br></blockquote><div><br></div><div>Same comment about brw_mark_surface_used() applies here.<br><br></div><div>With those issues fixed, this patch is:<br><br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br>
</div></div></div></div>