<div dir="ltr">Looks good to me :)<div><br></div><div>Reviewed-by: Plamena Manolova <<a href="mailto:plamena.manolova@intel.com">plamena.manolova@intel.com</a>></div></div><br><div class="gmail_quote"><div dir="ltr">On Fri, Nov 16, 2018 at 7:02 AM Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">It's not at all intel-specific; the formula is dictated by OpenGL and<br>
Vulkan. The only intel-specific thing is that we need the lowering. As<br>
a nice side-effect, the new version is variable-group-size ready.<br>
<br>
Cc: Plamena Manolova <<a href="mailto:plamena.n.manolova@gmail.com" target="_blank">plamena.n.manolova@gmail.com</a>><br>
---<br>
src/compiler/nir/nir.h | 1 +<br>
src/compiler/nir/nir_lower_system_values.c | 49 ++++++++++++++++++-<br>
src/intel/compiler/brw_compiler.c | 1 +<br>
.../compiler/brw_nir_lower_cs_intrinsics.c | 33 -------------<br>
4 files changed, 50 insertions(+), 34 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h<br>
index b0cff50eaf2..1dd605010f6 100644<br>
--- a/src/compiler/nir/nir.h<br>
+++ b/src/compiler/nir/nir.h<br>
@@ -2178,6 +2178,7 @@ typedef struct nir_shader_compiler_options {<br>
bool lower_helper_invocation;<br>
<br>
bool lower_cs_local_index_from_id;<br>
+ bool lower_cs_local_id_from_index;<br>
<br>
bool lower_device_index_to_zero;<br>
<br>
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c<br>
index fbc40573579..08a9e8be44a 100644<br>
--- a/src/compiler/nir/nir_lower_system_values.c<br>
+++ b/src/compiler/nir/nir_lower_system_values.c<br>
@@ -51,6 +51,45 @@ build_local_group_size(nir_builder *b)<br>
return local_size;<br>
}<br>
<br>
+static nir_ssa_def *<br>
+build_local_invocation_id(nir_builder *b)<br>
+{<br>
+ if (b->shader->options->lower_cs_local_id_from_index) {<br>
+ /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based<br>
+ * on this formula:<br>
+ *<br>
+ * gl_LocalInvocationID.x =<br>
+ * gl_LocalInvocationIndex % gl_WorkGroupSize.x;<br>
+ * gl_LocalInvocationID.y =<br>
+ * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %<br>
+ * gl_WorkGroupSize.y;<br>
+ * gl_LocalInvocationID.z =<br>
+ * (gl_LocalInvocationIndex /<br>
+ * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %<br>
+ * gl_WorkGroupSize.z;<br>
+ *<br>
+ * However, the final % gl_WorkGroupSize.z does nothing unless we<br>
+ * accidentally end up with a gl_LocalInvocationIndex that is too<br>
+ * large so it can safely be omitted.<br>
+ */<br>
+ nir_ssa_def *local_index = nir_load_local_invocation_index(b);<br>
+ nir_ssa_def *local_size = build_local_group_size(b);<br>
+<br>
+ nir_ssa_def *id_x, *id_y, *id_z;<br>
+ id_x = nir_umod(b, local_index,<br>
+ nir_channel(b, local_size, 0));<br>
+ id_y = nir_umod(b, nir_udiv(b, local_index,<br>
+ nir_channel(b, local_size, 0)),<br>
+ nir_channel(b, local_size, 1));<br>
+ id_z = nir_udiv(b, local_index,<br>
+ nir_imul(b, nir_channel(b, local_size, 0),<br>
+ nir_channel(b, local_size, 1)));<br>
+ return nir_vec3(b, id_x, id_y, id_z);<br>
+ } else {<br>
+ return nir_load_local_invocation_id(b);<br>
+ }<br>
+}<br>
+<br>
static bool<br>
convert_block(nir_block *block, nir_builder *b)<br>
{<br>
@@ -91,7 +130,7 @@ convert_block(nir_block *block, nir_builder *b)<br>
*/<br>
nir_ssa_def *group_size = build_local_group_size(b);<br>
nir_ssa_def *group_id = nir_load_work_group_id(b);<br>
- nir_ssa_def *local_id = nir_load_local_invocation_id(b);<br>
+ nir_ssa_def *local_id = build_local_invocation_id(b);<br>
<br>
sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);<br>
break;<br>
@@ -126,6 +165,14 @@ convert_block(nir_block *block, nir_builder *b)<br>
break;<br>
}<br>
<br>
+ case SYSTEM_VALUE_LOCAL_INVOCATION_ID:<br>
+ /* If lower_cs_local_id_from_index is true, then we derive the local<br>
+ * index from the local id.<br>
+ */<br>
+ if (b->shader->options->lower_cs_local_id_from_index)<br>
+ sysval = build_local_invocation_id(b);<br>
+ break;<br>
+<br>
case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {<br>
sysval = build_local_group_size(b);<br>
break;<br>
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c<br>
index e863b08b991..fe632c5badc 100644<br>
--- a/src/intel/compiler/brw_compiler.c<br>
+++ b/src/intel/compiler/brw_compiler.c<br>
@@ -42,6 +42,7 @@<br>
.lower_fdiv = true, \<br>
.lower_flrp64 = true, \<br>
.lower_ldexp = true, \<br>
+ .lower_cs_local_id_from_index = true, \<br>
.lower_device_index_to_zero = true, \<br>
.native_integers = true, \<br>
.use_interpolated_input_intrinsics = true, \<br>
diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c<br>
index bfbdea0e8fa..fab5edc893f 100644<br>
--- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c<br>
+++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c<br>
@@ -70,39 +70,6 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_load_local_invocation_id: {<br>
- /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based<br>
- * on this formula:<br>
- *<br>
- * gl_LocalInvocationID.x =<br>
- * gl_LocalInvocationIndex % gl_WorkGroupSize.x;<br>
- * gl_LocalInvocationID.y =<br>
- * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %<br>
- * gl_WorkGroupSize.y;<br>
- * gl_LocalInvocationID.z =<br>
- * (gl_LocalInvocationIndex /<br>
- * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %<br>
- * gl_WorkGroupSize.z;<br>
- */<br>
- unsigned *size = nir->info.cs.local_size;<br>
-<br>
- nir_ssa_def *local_index = nir_load_local_invocation_index(b);<br>
-<br>
- nir_const_value uvec3;<br>
- memset(&uvec3, 0, sizeof(uvec3));<br>
- uvec3.u32[0] = 1;<br>
- uvec3.u32[1] = size[0];<br>
- uvec3.u32[2] = size[0] * size[1];<br>
- nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);<br>
- uvec3.u32[0] = size[0];<br>
- uvec3.u32[1] = size[1];<br>
- uvec3.u32[2] = size[2];<br>
- nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);<br>
-<br>
- sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val);<br>
- break;<br>
- }<br>
-<br>
case nir_intrinsic_load_subgroup_id:<br>
if (state->local_workgroup_size > 8)<br>
continue;<br>
-- <br>
2.19.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>