<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Jun 1, 2016 at 3:04 PM, Jordan Justen <span dir="ltr"><<a href="mailto:jordan.l.justen@intel.com" target="_blank">jordan.l.justen@intel.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">The cross thread constant support appears on Haswell. It allows us to<br>
upload a set of uniform data for all threads without duplicating it<br>
per thread.<br>
<br>
We also support per-thread data which allows us to store a per-thread<br>
ID in one of the uniforms that can be used to calculate the<br>
gl_LocalInvocationIndex and gl_LocalInvocationID variables.<br>
<br>
v4:<br>
 * Support the old local ID push constant layout as well (Jason)<br>
<br>
Signed-off-by: Jordan Justen <<a href="mailto:jordan.l.justen@intel.com">jordan.l.justen@intel.com</a>><br>
---<br>
 src/mesa/drivers/dri/i965/brw_defines.h   |  3 +<br>
 src/mesa/drivers/dri/i965/gen7_cs_state.c | 99 +++++++++++++++++--------------<br>
 2 files changed, 56 insertions(+), 46 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h<br>
index 4eb6b1f..e7d1a9f 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_defines.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_defines.h<br>
@@ -2943,6 +2943,9 @@ enum brw_wm_barycentric_interp_mode {<br>
 # define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)<br>
 # define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT    0<br>
 # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)<br>
+/* GEN7 DW6, GEN8+ DW7 */<br>
+# define CROSS_THREAD_READ_LENGTH_SHIFT         0<br>
+# define CROSS_THREAD_READ_LENGTH_MASK          INTEL_MASK(7, 0)<br>
 #define MEDIA_STATE_FLUSH                       0x7004<br>
 #define GPGPU_WALKER                            0x7105<br>
 /* GEN7 DW0 */<br>
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c<br>
index 619edfb..2fee02d 100644<br>
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c<br>
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c<br>
@@ -42,7 +42,6 @@ brw_upload_cs_state(struct brw_context *brw)<br>
    uint32_t offset;<br>
    uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,<br>
                                                 8 * 4, 64, &offset);<br>
-   struct gl_program *prog = (struct gl_program *) brw->compute_program;<br>
    struct brw_stage_state *stage_state = &brw->cs.base;<br>
    struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;<br>
    struct brw_stage_prog_data *prog_data = &cs_prog_data->base;<br>
@@ -59,16 +58,6 @@ brw_upload_cs_state(struct brw_context *brw)<br>
                                             prog_data->binding_table.size_bytes,<br>
                                             32, &stage_state->bind_bo_offset);<br>
<br>
-   unsigned local_id_dwords = 0;<br>
-<br>
-   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)<br>
-      local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;<br>
-<br>
-   unsigned push_constant_data_size =<br>
-      (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);<br>
-   unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);<br>
-   unsigned push_constant_regs = reg_aligned_constant_size / 32;<br>
-<br>
    uint32_t dwords = brw->gen < 8 ? 8 : 9;<br>
    BEGIN_BATCH(dwords);<br>
    OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));<br>
@@ -118,7 +107,8 @@ brw_upload_cs_state(struct brw_context *brw)<br>
     * Note: The constant data is built in brw_upload_cs_push_constants below.<br>
     */<br>
    const uint32_t vfe_curbe_allocation =<br>
-      push_constant_regs * cs_prog_data->threads;<br>
+      ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +<br>
+            cs_prog_data->push.cross_thread.regs, 2);<br>
    OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |<br>
              SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));<br>
    OUT_BATCH(0);<br>
@@ -126,11 +116,11 @@ brw_upload_cs_state(struct brw_context *brw)<br>
    OUT_BATCH(0);<br>
    ADVANCE_BATCH();<br>
<br>
-   if (reg_aligned_constant_size > 0) {<br>
+   if (cs_prog_data->push.total.size > 0) {<br>
       BEGIN_BATCH(4);<br>
       OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));<br>
       OUT_BATCH(0);<br>
-      OUT_BATCH(ALIGN(reg_aligned_constant_size * cs_prog_data->threads, 64));<br>
+      OUT_BATCH(ALIGN(cs_prog_data->push.total.size, 64));<br>
       OUT_BATCH(stage_state->push_const_offset);<br>
       ADVANCE_BATCH();<br>
    }<br>
@@ -149,7 +139,8 @@ brw_upload_cs_state(struct brw_context *brw)<br>
    desc[dw++] = stage_state->sampler_offset |<br>
       ((stage_state->sampler_count + 3) / 4);<br>
    desc[dw++] = stage_state->bind_bo_offset;<br>
-   desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);<br>
+   desc[dw++] = SET_FIELD(cs_prog_data->push.per_thread.regs,<br>
+                          MEDIA_CURBE_READ_LENGTH);<br>
    const uint32_t media_threads =<br>
       brw->gen >= 8 ?<br>
       SET_FIELD(cs_prog_data->threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :<br>
@@ -171,6 +162,10 @@ brw_upload_cs_state(struct brw_context *brw)<br>
       SET_FIELD(slm_size, MEDIA_SHARED_LOCAL_MEMORY_SIZE) |<br>
       media_threads;<br>
<br>
+   desc[dw++] =<br>
+      SET_FIELD(cs_prog_data->push.cross_thread.regs,<br>
+                CROSS_THREAD_READ_LENGTH);<br></blockquote><div><br></div><div>I don't think this needs 3 lines.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
    BEGIN_BATCH(4);<br>
    OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));<br>
    OUT_BATCH(0);<br>
@@ -213,10 +208,6 @@ brw_upload_cs_push_constants(struct brw_context *brw,<br>
    struct gl_context *ctx = &brw->ctx;<br>
    const struct brw_stage_prog_data *prog_data =<br>
       (struct brw_stage_prog_data*) cs_prog_data;<br>
-   unsigned local_id_dwords = 0;<br>
-<br>
-   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)<br>
-      local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;<br>
<br>
    /* Updates the ParamaterValues[i] pointers for all parameters of the<br>
     * basic type of PROGRAM_STATE_VAR.<br>
@@ -224,41 +215,57 @@ brw_upload_cs_push_constants(struct brw_context *brw,<br>
    /* XXX: Should this happen somewhere before to get our state flag set? */<br>
    _mesa_load_state_parameters(ctx, prog->Parameters);<br>
<br>
-   if (prog_data->nr_params == 0 && local_id_dwords == 0) {<br>
+   if (cs_prog_data->push.total.size == 0) {<br>
       stage_state->push_const_size = 0;<br>
-   } else {<br>
-      gl_constant_value *param;<br>
-      unsigned i, t;<br>
-<br>
-      const unsigned push_constant_data_size =<br>
-         (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value);<br>
-      const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);<br>
-      const unsigned param_aligned_count =<br>
-         reg_aligned_constant_size / sizeof(*param);<br>
+      return;<br>
+   }<br>
<br>
-      param = (gl_constant_value*)<br>
-         brw_state_batch(brw, type,<br>
-                         ALIGN(reg_aligned_constant_size *<br>
-                                  cs_prog_data->threads, 64),<br>
-                         64, &stage_state->push_const_offset);<br>
-      assert(param);<br>
<br>
-      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));<br>
+   gl_constant_value *param;<br>
+   param = (gl_constant_value*)<br></blockquote><div><br></div><div>These could go on the same line.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+      brw_state_batch(brw, type,<br>
+                      ALIGN(cs_prog_data->push.total.size, 64),<br>
+                      64, &stage_state->push_const_offset);<br>
+   assert(param);<br>
+<br>
+   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));<br>
+<br>
+   if (cs_prog_data->push.cross_thread.size > 0) {<br>
+      gl_constant_value *param_copy = param;<br>
+      assert(cs_prog_data->thread_local_id_index < 0 ||<br>
+             cs_prog_data->thread_local_id_index >=<br>
+                cs_prog_data->push.cross_thread.dwords);<br>
+      for (unsigned i = 0;<br>
+           i < cs_prog_data->push.cross_thread.dwords;<br>
+           i++) {<br>
+         param_copy[i] = *prog_data->param[i];<br>
+      }<br>
+   }<br>
<br>
+   gl_constant_value thread_id;<br>
+   if (cs_prog_data->push.per_thread.size > 0) {<br>
       brw_cs_fill_local_id_payload(cs_prog_data, param, cs_prog_data->threads,<br>
-                                   reg_aligned_constant_size);<br>
-<br>
-      /* _NEW_PROGRAM_CONSTANTS */<br>
-      for (t = 0; t < cs_prog_data->threads; t++) {<br>
-         gl_constant_value *next_param =<br>
-            &param[t * param_aligned_count + local_id_dwords];<br>
-         for (i = 0; i < prog_data->nr_params; i++) {<br>
-            next_param[i] = *prog_data->param[i];<br>
+                                   cs_prog_data->push.per_thread.size);<br>
+      for (unsigned t = 0; t < cs_prog_data->threads; t++) {<br>
+         unsigned dst =<br>
+            8 * (cs_prog_data->push.per_thread.regs * t +<br>
+                 cs_prog_data->push.cross_thread.regs +<br>
+                 cs_prog_data->local_invocation_id_regs);<br>
+         unsigned src = cs_prog_data->push.cross_thread.dwords;<br>
+         for ( ; src < prog_data->nr_params; src++, dst++) {<br>
+            if (src != cs_prog_data->thread_local_id_index)<br>
+               param[dst] = *prog_data->param[src];<br>
+            else {<br>
+               thread_id.u = t * cs_prog_data->simd_size;<br>
+               param[dst] = thread_id;<br>
+            }<br>
          }<br>
       }<br>
-<br>
-      stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;<br>
    }<br>
+<br>
+   stage_state->push_const_size =<br>
+      cs_prog_data->push.cross_thread.regs +<br>
+      cs_prog_data->push.per_thread.regs;<br>
 }<br>
<span class="HOEnZb"><font color="#888888"><br>
<br>
--<br>
2.8.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>