<div dir="ltr">Looks good to me :)<div><br></div><div>Reviewed-by: Plamena Manolova <<a href="mailto:plamena.manolova@intel.com">plamena.manolova@intel.com</a>></div></div><br><div class="gmail_quote"><div dir="ltr">On Fri, Nov 16, 2018 at 7:02 AM Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">It's not at all intel-specific; the formula is dictated by OpenGL and<br>
Vulkan.  The only intel-specific thing is that we need the lowering.  As<br>
a nice side-effect, the new version is variable-group-size ready.<br>
<br>
Cc: Plamena Manolova <<a href="mailto:plamena.n.manolova@gmail.com" target="_blank">plamena.n.manolova@gmail.com</a>><br>
---<br>
 src/compiler/nir/nir.h                        |  1 +<br>
 src/compiler/nir/nir_lower_system_values.c    | 49 ++++++++++++++++++-<br>
 src/intel/compiler/brw_compiler.c             |  1 +<br>
 .../compiler/brw_nir_lower_cs_intrinsics.c    | 33 -------------<br>
 4 files changed, 50 insertions(+), 34 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h<br>
index b0cff50eaf2..1dd605010f6 100644<br>
--- a/src/compiler/nir/nir.h<br>
+++ b/src/compiler/nir/nir.h<br>
@@ -2178,6 +2178,7 @@ typedef struct nir_shader_compiler_options {<br>
    bool lower_helper_invocation;<br>
<br>
    bool lower_cs_local_index_from_id;<br>
+   bool lower_cs_local_id_from_index;<br>
<br>
    bool lower_device_index_to_zero;<br>
<br>
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c<br>
index fbc40573579..08a9e8be44a 100644<br>
--- a/src/compiler/nir/nir_lower_system_values.c<br>
+++ b/src/compiler/nir/nir_lower_system_values.c<br>
@@ -51,6 +51,45 @@ build_local_group_size(nir_builder *b)<br>
    return local_size;<br>
 }<br>
<br>
+static nir_ssa_def *<br>
+build_local_invocation_id(nir_builder *b)<br>
+{<br>
+   if (b->shader->options->lower_cs_local_id_from_index) {<br>
+      /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based<br>
+       * on this formula:<br>
+       *<br>
+       *    gl_LocalInvocationID.x =<br>
+       *       gl_LocalInvocationIndex % gl_WorkGroupSize.x;<br>
+       *    gl_LocalInvocationID.y =<br>
+       *       (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %<br>
+       *       gl_WorkGroupSize.y;<br>
+       *    gl_LocalInvocationID.z =<br>
+       *       (gl_LocalInvocationIndex /<br>
+       *        (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %<br>
+       *       gl_WorkGroupSize.z;<br>
+       *<br>
+       * However, the final % gl_WorkGroupSize.z does nothing unless we<br>
+       * accidentally end up with a gl_LocalInvocationIndex that is too<br>
+       * large so it can safely be omitted.<br>
+       */<br>
+      nir_ssa_def *local_index = nir_load_local_invocation_index(b);<br>
+      nir_ssa_def *local_size = build_local_group_size(b);<br>
+<br>
+      nir_ssa_def *id_x, *id_y, *id_z;<br>
+      id_x = nir_umod(b, local_index,<br>
+                         nir_channel(b, local_size, 0));<br>
+      id_y = nir_umod(b, nir_udiv(b, local_index,<br>
+                                     nir_channel(b, local_size, 0)),<br>
+                         nir_channel(b, local_size, 1));<br>
+      id_z = nir_udiv(b, local_index,<br>
+                         nir_imul(b, nir_channel(b, local_size, 0),<br>
+                                     nir_channel(b, local_size, 1)));<br>
+      return nir_vec3(b, id_x, id_y, id_z);<br>
+   } else {<br>
+      return nir_load_local_invocation_id(b);<br>
+   }<br>
+}<br>
+<br>
 static bool<br>
 convert_block(nir_block *block, nir_builder *b)<br>
 {<br>
@@ -91,7 +130,7 @@ convert_block(nir_block *block, nir_builder *b)<br>
           */<br>
          nir_ssa_def *group_size = build_local_group_size(b);<br>
          nir_ssa_def *group_id = nir_load_work_group_id(b);<br>
-         nir_ssa_def *local_id = nir_load_local_invocation_id(b);<br>
+         nir_ssa_def *local_id = build_local_invocation_id(b);<br>
<br>
          sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);<br>
          break;<br>
@@ -126,6 +165,14 @@ convert_block(nir_block *block, nir_builder *b)<br>
          break;<br>
       }<br>
<br>
+      case SYSTEM_VALUE_LOCAL_INVOCATION_ID:<br>
+         /* If lower_cs_local_id_from_index is true, then we derive the local<br>
+          * index from the local id.<br>
+          */<br>
+         if (b->shader->options->lower_cs_local_id_from_index)<br>
+            sysval = build_local_invocation_id(b);<br>
+         break;<br>
+<br>
       case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {<br>
          sysval = build_local_group_size(b);<br>
          break;<br>
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c<br>
index e863b08b991..fe632c5badc 100644<br>
--- a/src/intel/compiler/brw_compiler.c<br>
+++ b/src/intel/compiler/brw_compiler.c<br>
@@ -42,6 +42,7 @@<br>
    .lower_fdiv = true,                                                        \<br>
    .lower_flrp64 = true,                                                      \<br>
    .lower_ldexp = true,                                                       \<br>
+   .lower_cs_local_id_from_index = true,                                      \<br>
    .lower_device_index_to_zero = true,                                        \<br>
    .native_integers = true,                                                   \<br>
    .use_interpolated_input_intrinsics = true,                                 \<br>
diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c<br>
index bfbdea0e8fa..fab5edc893f 100644<br>
--- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c<br>
+++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c<br>
@@ -70,39 +70,6 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,<br>
          break;<br>
       }<br>
<br>
-      case nir_intrinsic_load_local_invocation_id: {<br>
-         /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based<br>
-          * on this formula:<br>
-          *<br>
-          *    gl_LocalInvocationID.x =<br>
-          *       gl_LocalInvocationIndex % gl_WorkGroupSize.x;<br>
-          *    gl_LocalInvocationID.y =<br>
-          *       (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %<br>
-          *       gl_WorkGroupSize.y;<br>
-          *    gl_LocalInvocationID.z =<br>
-          *       (gl_LocalInvocationIndex /<br>
-          *        (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %<br>
-          *       gl_WorkGroupSize.z;<br>
-          */<br>
-         unsigned *size = nir->info.cs.local_size;<br>
-<br>
-         nir_ssa_def *local_index = nir_load_local_invocation_index(b);<br>
-<br>
-         nir_const_value uvec3;<br>
-         memset(&uvec3, 0, sizeof(uvec3));<br>
-         uvec3.u32[0] = 1;<br>
-         uvec3.u32[1] = size[0];<br>
-         uvec3.u32[2] = size[0] * size[1];<br>
-         nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);<br>
-         uvec3.u32[0] = size[0];<br>
-         uvec3.u32[1] = size[1];<br>
-         uvec3.u32[2] = size[2];<br>
-         nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);<br>
-<br>
-         sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val);<br>
-         break;<br>
-      }<br>
-<br>
       case nir_intrinsic_load_subgroup_id:<br>
          if (state->local_workgroup_size > 8)<br>
             continue;<br>
-- <br>
2.19.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>