<div class="gmail_quote">On 19 September 2012 13:27, Kenneth Graunke <span dir="ltr"><<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Data port reads are absurdly slow on Ivybridge due to cache issues.<br>
<br>
The LD message ignores the sampler unit index and SAMPLER_STATE pointer,<br>
instead relying on hard-wired default state.  Thus, there's no need to<br>
worry about running out of sampler units or providing SAMPLER_STATE;<br>
this small patch should be all that's required.<br>
<br>
NOTE: This is a candidate for all release branches.<br></blockquote><div><br></div><div>Given that this affects only performance and not correctness, I'm having trouble convincing myself that this patch should be a candidate for release branches.  Don't we usually try to restrict release cherry-picks to things like rendering issues and avoiding GPU hangs?</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
---<br>
 src/mesa/drivers/dri/i965/brw_fs.h        |  3 +++<br>
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 36 ++++++++++++++++++++++++++++++-<br>
 2 files changed, 38 insertions(+), 1 deletion(-)<br>
<br>
I did this a long time ago for VS pull constant loading, which resulted in<br>
a 2-5x speedup for certain benchmarks.  Apparently at the time I never got<br>
FS pull constant loading working, and didn't have a benchmark that needed<br>
it, so I never finished and pushed it.<br>
<br>
Now I have a game that needs it.  No concrete data as I haven't figured out<br>
how to get consistent FPS numbers out of it.<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h<br>
index e69de31..b5f2152 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.h<br>
@@ -295,6 +295,9 @@ public:<br>
    void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,<br>
                                    struct brw_reg index,<br>
                                    struct brw_reg offset);<br>
+   void gen7_generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,<br>
+                                         struct brw_reg index,<br>
+                                         struct brw_reg offset);<br>
    void generate_mov_dispatch_to_flags();<br>
<br>
    void emit_dummy_fs();<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
index 5900c0e..4059660 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
@@ -585,6 +585,37 @@ fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)<br>
 }<br>
<br>
 void<br>
+fs_visitor::gen7_generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,<br>
+                                             struct brw_reg index,<br>
+                                             struct brw_reg offset)<br>
+{<br>
+   assert(intel->gen == 7);<br>
+   assert(index.file == BRW_IMMEDIATE_VALUE &&<br>
+         index.type == BRW_REGISTER_TYPE_UD);<br>
+   assert(offset.file == BRW_IMMEDIATE_VALUE &&<br>
+         offset.type == BRW_REGISTER_TYPE_UD);<br>
+   uint32_t surf_index = index.dw1.ud;<br>
+   uint32_t read_offset = offset.dw1.ud;<br>
+<br>
+   /* offset is an IMM; SEND needs to be from a GRF. */<br>
+   offset = retype(brw_vec8_grf(127, 0), BRW_REGISTER_TYPE_UD);<br>
+   brw_MOV(p, offset, brw_imm_ud(read_offset / 16));<br>
+<br>
+   brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);<br>
+   brw_set_dest(p, insn, dst);<br>
+   brw_set_src0(p, insn, offset);<br>
+   brw_set_sampler_message(p, insn,<br>
+                           surf_index,<br>
+                           0, /* LD message ignores sampler unit */<br>
+                           GEN5_SAMPLER_MESSAGE_SAMPLE_LD,<br>
+                           1, /* rlen */<br>
+                           1, /* mlen */<br>
+                           false, /* no header */<br>
+                           BRW_SAMPLER_SIMD_MODE_SIMD4X2,<br>
+                           0);<br>
+}<br>
+<br>
+void<br>
 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,<br>
                                        struct brw_reg index,<br>
                                        struct brw_reg offset)<br>
@@ -980,7 +1011,10 @@ fs_visitor::generate_code()<br>
         break;<br>
<br>
       case FS_OPCODE_PULL_CONSTANT_LOAD:<br>
-        generate_pull_constant_load(inst, dst, src[0], src[1]);<br>
+        if (intel->gen == 7)<br>
+           gen7_generate_pull_constant_load(inst, dst, src[0], src[1]);<br>
+        else<br>
+           generate_pull_constant_load(inst, dst, src[0], src[1]);<br>
         break;<br>
<br>
       case FS_OPCODE_FB_WRITE:<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.11.4<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br>