<div dir="ltr"><br><div class="gmail_quote"><div dir="ltr">On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst <<a href="mailto:kherbst@redhat.com">kherbst@redhat.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Signed-off-by: Karol Herbst <<a href="mailto:kherbst@redhat.com" target="_blank">kherbst@redhat.com</a>><br>
---<br>
src/amd/vulkan/radv_meta_buffer.c | 8 ++--<br>
src/amd/vulkan/radv_meta_bufimage.c | 28 +++++++-------<br>
src/amd/vulkan/radv_meta_clear.c | 4 +-<br>
src/amd/vulkan/radv_meta_fast_clear.c | 4 +-<br>
src/amd/vulkan/radv_meta_resolve_cs.c | 4 +-<br>
src/amd/vulkan/radv_query.c | 12 +++---<br>
src/compiler/nir/nir_intrinsics.py | 12 +++---<br>
src/compiler/nir/nir_lower_system_values.c | 43 +++++++++++++---------<br>
8 files changed, 61 insertions(+), 54 deletions(-)<br>
<br>
diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c<br>
index 76854d7bbad..208988c3775 100644<br>
--- a/src/amd/vulkan/radv_meta_buffer.c<br>
+++ b/src/amd/vulkan/radv_meta_buffer.c<br>
@@ -15,8 +15,8 @@ build_buffer_fill_shader(struct radv_device *dev)<br>
b.shader->info.cs.local_size[1] = 1;<br>
b.shader->info.cs.local_size[2] = 1;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
@@ -67,8 +67,8 @@ build_buffer_copy_shader(struct radv_device *dev)<br>
b.shader->info.cs.local_size[1] = 1;<br>
b.shader->info.cs.local_size[2] = 1;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c<br>
index 45df8438234..c8a733b3062 100644<br>
--- a/src/amd/vulkan/radv_meta_bufimage.c<br>
+++ b/src/amd/vulkan/radv_meta_bufimage.c<br>
@@ -60,8 +60,8 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)<br>
output_img->data.descriptor_set = 0;<br>
output_img->data.binding = 1;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
@@ -289,8 +289,8 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)<br>
output_img->data.descriptor_set = 0;<br>
output_img->data.binding = 1;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
@@ -511,8 +511,8 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)<br>
output_img->data.descriptor_set = 0;<br>
output_img->data.binding = 1;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
@@ -719,8 +719,8 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)<br>
output_img->data.descriptor_set = 0;<br>
output_img->data.binding = 1;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
@@ -932,8 +932,8 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)<br>
output_img->data.descriptor_set = 0;<br>
output_img->data.binding = 1;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
@@ -1139,8 +1139,8 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)<br>
output_img->data.descriptor_set = 0;<br>
output_img->data.binding = 0;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
@@ -1331,8 +1331,8 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)<br>
output_img->data.descriptor_set = 0;<br>
output_img->data.binding = 0;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c<br>
index 5805d39c4b3..2e1274c929d 100644<br>
--- a/src/amd/vulkan/radv_meta_clear.c<br>
+++ b/src/amd/vulkan/radv_meta_clear.c<br>
@@ -1025,8 +1025,8 @@ build_clear_htile_mask_shader()<br>
b.shader->info.cs.local_size[1] = 1;<br>
b.shader->info.cs.local_size[2] = 1;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c<br>
index f2f68961e2e..136b596aa4f 100644<br>
--- a/src/amd/vulkan/radv_meta_fast_clear.c<br>
+++ b/src/amd/vulkan/radv_meta_fast_clear.c<br>
@@ -58,8 +58,8 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)<br>
output_img->data.descriptor_set = 0;<br>
output_img->data.binding = 1;<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c<br>
index e56df7f8a59..1ee8ce32ac0 100644<br>
--- a/src/amd/vulkan/radv_meta_resolve_cs.c<br>
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c<br>
@@ -99,8 +99,8 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s<br>
img_type, "out_img");<br>
output_img->data.descriptor_set = 0;<br>
output_img->data.binding = 1;<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c<br>
index d3baf2357ff..a7d53b938a7 100644<br>
--- a/src/amd/vulkan/radv_query.c<br>
+++ b/src/amd/vulkan/radv_query.c<br>
@@ -153,8 +153,8 @@ build_occlusion_query_shader(struct radv_device *device) {<br>
nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);<br>
nir_builder_instr_insert(&b, &src_buf->instr);<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
@@ -343,8 +343,8 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {<br>
nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);<br>
nir_builder_instr_insert(&b, &src_buf->instr);<br>
<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
@@ -590,8 +590,8 @@ build_tfb_query_shader(struct radv_device *device)<br>
nir_builder_instr_insert(&b, &src_buf->instr);<br>
<br>
/* Compute global ID. */<br>
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);<br>
- nir_ssa_def *wg_id = nir_load_work_group_id(&b);<br>
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);<br>
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);<br>
nir_ssa_def *block_size = nir_imm_ivec4(&b,<br>
b.shader->info.cs.local_size[0],<br>
b.shader->info.cs.local_size[1],<br>
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py<br>
index 830c406b450..746ddd22d92 100644<br>
--- a/src/compiler/nir/nir_intrinsics.py<br>
+++ b/src/compiler/nir/nir_intrinsics.py<br>
@@ -479,11 +479,11 @@ system_value("tess_coord", 3)<br>
system_value("tess_level_outer", 4)<br>
system_value("tess_level_inner", 2)<br>
system_value("patch_vertices_in", 1)<br>
-system_value("local_invocation_id", 3)<br>
-system_value("local_invocation_index", 1)<br>
-system_value("work_group_id", 3)<br>
+system_value("local_invocation_id", 3, bit_sizes=[32, 64])<br>
+system_value("local_invocation_index", 1, bit_sizes=[32, 64])<br>
+system_value("work_group_id", 3, bit_sizes=[32, 64])<br>
system_value("user_clip_plane", 4, indices=[UCP_ID])<br>
-system_value("num_work_groups", 3)<br>
+system_value("num_work_groups", 3, bit_sizes=[32, 64])<br>
system_value("helper_invocation", 1)<br>
system_value("alpha_ref_float", 1)<br>
system_value("layer_id", 1)<br>
@@ -497,8 +497,8 @@ system_value("subgroup_le_mask", 0, bit_sizes=[32, 64])<br>
system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64])<br>
system_value("num_subgroups", 1)<br>
system_value("subgroup_id", 1)<br>
-system_value("local_group_size", 3)<br>
-system_value("global_invocation_id", 3)<br>
+system_value("local_group_size", 3, bit_sizes=[32, 64])<br>
+system_value("global_invocation_id", 3, bit_sizes=[32, 64])<br>
system_value("work_dim", 1)<br>
<br>
# Blend constant color values. Float values are clamped.#<br>
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c<br>
index 68b0ea89c8d..3cb9f224ecd 100644<br>
--- a/src/compiler/nir/nir_lower_system_values.c<br>
+++ b/src/compiler/nir/nir_lower_system_values.c<br>
@@ -29,7 +29,7 @@<br>
#include "nir_builder.h"<br>
<br>
static nir_ssa_def*<br>
-build_local_group_size(nir_builder *b)<br>
+build_local_group_size(nir_builder *b, unsigned bit_size)<br>
{<br>
nir_ssa_def *local_size;<br>
<br>
@@ -38,21 +38,27 @@ build_local_group_size(nir_builder *b)<br>
* point, but its intrinsic can still be used.<br>
*/<br>
if (b->shader->info.cs.local_size_variable) {<br>
- local_size = nir_load_local_group_size(b);<br>
+ local_size = nir_load_local_group_size(b, bit_size);<br>
} else {<br>
nir_const_value local_size_const;<br>
memset(&local_size_const, 0, sizeof(local_size_const));<br>
- local_size_const.u32[0] = b->shader->info.cs.local_size[0];<br>
- local_size_const.u32[1] = b->shader->info.cs.local_size[1];<br>
- local_size_const.u32[2] = b->shader->info.cs.local_size[2];<br>
- local_size = nir_build_imm(b, 3, 32, local_size_const);<br>
+ if (bit_size == 32) {<br>
+ local_size_const.u32[0] = b->shader->info.cs.local_size[0];<br>
+ local_size_const.u32[1] = b->shader->info.cs.local_size[1];<br>
+ local_size_const.u32[2] = b->shader->info.cs.local_size[2];<br>
+ } else {<br></blockquote><div><br></div><div>assert(bit_size == 64);<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ local_size_const.u64[0] = b->shader->info.cs.local_size[0];<br>
+ local_size_const.u64[1] = b->shader->info.cs.local_size[1];<br>
+ local_size_const.u64[2] = b->shader->info.cs.local_size[2];<br>
+ }<br>
+ local_size = nir_build_imm(b, 3, bit_size, local_size_const);<br>
}<br>
<br>
return local_size;<br>
}<br>
<br>
static nir_ssa_def *<br>
-build_local_invocation_id(nir_builder *b)<br>
+build_local_invocation_id(nir_builder *b, unsigned bit_size)<br>
{<br>
if (b->shader->options->lower_cs_local_id_from_index) {<br>
/* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based<br>
@@ -72,8 +78,8 @@ build_local_invocation_id(nir_builder *b)<br>
* accidentally end up with a gl_LocalInvocationIndex that is too<br>
* large so it can safely be omitted.<br>
*/<br>
- nir_ssa_def *local_index = nir_load_local_invocation_index(b);<br>
- nir_ssa_def *local_size = build_local_group_size(b);<br>
+ nir_ssa_def *local_index = nir_load_local_invocation_index(b, bit_size);<br>
+ nir_ssa_def *local_size = build_local_group_size(b, bit_size);<br></blockquote><div><br></div><div>In my iris clover branch, I instead have a line at that just does "if (bit_size == 64) id = nir_u2u64(b, id)" where "id" is the final computed 3D ID just before returning it. This lets us satisfy the CL requirements while still only using 32-bit system values and doing the calculation in 32 bits.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
nir_ssa_def *id_x, *id_y, *id_z;<br>
id_x = nir_umod(b, local_index,<br>
@@ -86,7 +92,7 @@ build_local_invocation_id(nir_builder *b)<br>
nir_channel(b, local_size, 1)));<br></blockquote><div><br></div><div>In other words, righ here we have</div><div><br></div><div>nir_ssa_def *id = nir_vec3(b, id_x, id_y, id_z);</div><div><br></div><div>if (bit_size == 64)</div><div> id = nir_u2u64(b, id);</div><div><br></div><div>return id;</div><div><br></div><div>It's kind of nice to avoid unneeded 64-bit math especially when some of that math is division/modulus. :-)<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
return nir_vec3(b, id_x, id_y, id_z);<br>
} else {<br>
- return nir_load_local_invocation_id(b);<br>
+ return nir_load_local_invocation_id(b, bit_size);<br>
}<br>
}<br>
<br>
@@ -120,6 +126,7 @@ convert_block(nir_block *block, nir_builder *b)<br>
<br>
b->cursor = nir_after_instr(&load_deref->instr);<br>
<br>
+ unsigned bit_size = nir_dest_bit_size(load_deref->dest);<br>
nir_ssa_def *sysval = NULL;<br>
switch (var->data.location) {<br>
case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {<br>
@@ -128,9 +135,9 @@ convert_block(nir_block *block, nir_builder *b)<br>
* "The value of gl_GlobalInvocationID is equal to<br>
* gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"<br>
*/<br>
- nir_ssa_def *group_size = build_local_group_size(b);<br>
- nir_ssa_def *group_id = nir_load_work_group_id(b);<br>
- nir_ssa_def *local_id = build_local_invocation_id(b);<br>
+ nir_ssa_def *group_size = build_local_group_size(b, bit_size);<br>
+ nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);<br>
+ nir_ssa_def *local_id = build_local_invocation_id(b, bit_size);<br>
<br>
sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);<br>
break;<br>
@@ -150,7 +157,7 @@ convert_block(nir_block *block, nir_builder *b)<br>
* gl_WorkGroupSize.y + gl_LocalInvocationID.y *<br>
* gl_WorkGroupSize.x + gl_LocalInvocationID.x"<br>
*/<br>
- nir_ssa_def *local_id = nir_load_local_invocation_id(b);<br>
+ nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size);<br>
<br>
nir_ssa_def *size_x =<br>
nir_imm_int(b, b->shader->info.cs.local_size[0]);<br>
@@ -170,11 +177,11 @@ convert_block(nir_block *block, nir_builder *b)<br>
* index from the local id.<br>
*/<br>
if (b->shader->options->lower_cs_local_id_from_index)<br>
- sysval = build_local_invocation_id(b);<br>
+ sysval = build_local_invocation_id(b, bit_size);<br>
break;<br>
<br>
case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {<br>
- sysval = build_local_group_size(b);<br>
+ sysval = build_local_group_size(b, bit_size);<br>
break;<br>
}<br>
<br>
@@ -248,8 +255,8 @@ convert_block(nir_block *block, nir_builder *b)<br>
break;<br>
<br>
case SYSTEM_VALUE_GLOBAL_GROUP_SIZE: {<br>
- nir_ssa_def *group_size = build_local_group_size(b);<br>
- nir_ssa_def *num_work_groups = nir_load_num_work_groups(b);<br>
+ nir_ssa_def *group_size = build_local_group_size(b, bit_size);<br>
+ nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size);<br>
sysval = nir_imul(b, group_size, num_work_groups);<br>
break;<br>
}<br>
-- <br>
2.19.2<br>
<br>
</blockquote></div></div>