Mesa (master): iris: Stop using cs_prog_data->threads

Fri Apr 10 03:30:55 UTC 2020

Module: Mesa
Branch: master
Commit: 9ff55621d9c9d299ac8e4eb2fcfe6db8a7a1b2a1
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9ff55621d9c9d299ac8e4eb2fcfe6db8a7a1b2a1

Author: Caio Marcelo de Oliveira Filho <caio.oliveira at intel.com>
Date:   Thu Apr  9 16:54:25 2020 -0700

iris: Stop using cs_prog_data->threads

This is a preparation for dropping this field since this value is
expected to be calculated by the drivers now for variable group size
case.  And also the field would get in the way of brw_compile_cs
producing multiple SIMD variants (like FS).

Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4504>

---

 src/gallium/drivers/iris/iris_context.h |  1 +
 src/gallium/drivers/iris/iris_program.c |  5 +++--
 src/gallium/drivers/iris/iris_state.c   | 14 ++++++++------
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index e6a7af1732c..d1952ed27e4 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -847,6 +847,7 @@ void iris_init_perfquery_functions(struct pipe_context *ctx);
 void iris_update_compiled_shaders(struct iris_context *ice);
 void iris_update_compiled_compute_shader(struct iris_context *ice);
 void iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
+                                    unsigned threads,
                                     uint32_t *dst);
 
 
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 3e8abcd428d..c876540cbbb 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -2020,13 +2020,14 @@ iris_update_compiled_compute_shader(struct iris_context *ice)
 
 void
 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
+                               unsigned threads,
                                uint32_t *dst)
 {
-   assert(brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads) > 0);
+   assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
    assert(cs_prog_data->push.cross_thread.size == 0);
    assert(cs_prog_data->push.per_thread.dwords == 1);
    assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
-   for (unsigned t = 0; t < cs_prog_data->threads; t++)
+   for (unsigned t = 0; t < threads; t++)
       dst[8 * t] = t;
 }
 
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 5ea1cd8a2f1..f572c0213dd 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -4365,7 +4365,6 @@ iris_store_cs_state(struct iris_context *ice,
    iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) {
       desc.KernelStartPointer = KSP(shader);
       desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
-      desc.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads;
       desc.SharedLocalMemorySize =
          encode_slm_size(GEN_GEN, prog_data->total_shared);
       desc.BarrierEnable = cs_prog_data->uses_barrier;
@@ -6471,6 +6470,9 @@ iris_upload_compute_state(struct iris_context *ice,
    struct brw_stage_prog_data *prog_data = shader->prog_data;
    struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
 
+   const uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2];
+   const unsigned threads = DIV_ROUND_UP(group_size, cs_prog_data->simd_size);
+
    /* Always pin the binder.  If we're emitting new binding table pointers,
     * we need it.  If not, we're probably inheriting old tables via the
     * context, and need it anyway.  Since true zero-bindings cases are
@@ -6532,7 +6534,7 @@ iris_upload_compute_state(struct iris_context *ice,
          vfe.URBEntryAllocationSize = 2;
 
          vfe.CURBEAllocationSize =
-            ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
+            ALIGN(cs_prog_data->push.per_thread.regs * threads +
                   cs_prog_data->push.cross_thread.regs, 2);
       }
    }
@@ -6544,7 +6546,7 @@ iris_upload_compute_state(struct iris_context *ice,
              cs_prog_data->push.per_thread.dwords == 1 &&
              cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
       const unsigned push_const_size =
-         brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads);
+         brw_cs_push_const_total_size(cs_prog_data, threads);
       uint32_t *curbe_data_map =
          stream_state(batch, ice->state.dynamic_uploader,
                       &ice->state.last_res.cs_thread_ids,
@@ -6552,7 +6554,7 @@ iris_upload_compute_state(struct iris_context *ice,
                       &curbe_data_offset);
       assert(curbe_data_map);
       memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
-      iris_fill_cs_push_const_buffer(cs_prog_data, curbe_data_map);
+      iris_fill_cs_push_const_buffer(cs_prog_data, threads, curbe_data_map);
 
       iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
          curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
@@ -6569,6 +6571,7 @@ iris_upload_compute_state(struct iris_context *ice,
       iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
          idd.SamplerStatePointer = shs->sampler_table.offset;
          idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
+         idd.NumberofThreadsinGPGPUThreadGroup = threads;
       }
 
       for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++)
@@ -6583,7 +6586,6 @@ iris_upload_compute_state(struct iris_context *ice,
       }
    }
 
-   uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2];
    uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
    uint32_t right_mask;
 
@@ -6618,7 +6620,7 @@ iris_upload_compute_state(struct iris_context *ice,
       ggw.SIMDSize                   = cs_prog_data->simd_size / 16;
       ggw.ThreadDepthCounterMaximum  = 0;
       ggw.ThreadHeightCounterMaximum = 0;
-      ggw.ThreadWidthCounterMaximum  = cs_prog_data->threads - 1;
+      ggw.ThreadWidthCounterMaximum  = threads - 1;
       ggw.ThreadGroupIDXDimension    = grid->grid[0];
       ggw.ThreadGroupIDYDimension    = grid->grid[1];
       ggw.ThreadGroupIDZDimension    = grid->grid[2];