[Mesa-dev] [PATCH 11/12] nir: add support for address bit sized system values

Karol Herbst kherbst at redhat.com
Tue Jan 8 19:24:36 UTC 2019


On Mon, Jan 7, 2019 at 6:29 PM Jason Ekstrand <jason at jlekstrand.net> wrote:
>
>
> On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst <kherbst at redhat.com> wrote:
>>
>> Signed-off-by: Karol Herbst <kherbst at redhat.com>
>> ---
>>  src/amd/vulkan/radv_meta_buffer.c          |  8 ++--
>>  src/amd/vulkan/radv_meta_bufimage.c        | 28 +++++++-------
>>  src/amd/vulkan/radv_meta_clear.c           |  4 +-
>>  src/amd/vulkan/radv_meta_fast_clear.c      |  4 +-
>>  src/amd/vulkan/radv_meta_resolve_cs.c      |  4 +-
>>  src/amd/vulkan/radv_query.c                | 12 +++---
>>  src/compiler/nir/nir_intrinsics.py         | 12 +++---
>>  src/compiler/nir/nir_lower_system_values.c | 43 +++++++++++++---------
>>  8 files changed, 61 insertions(+), 54 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c
>> index 76854d7bbad..208988c3775 100644
>> --- a/src/amd/vulkan/radv_meta_buffer.c
>> +++ b/src/amd/vulkan/radv_meta_buffer.c
>> @@ -15,8 +15,8 @@ build_buffer_fill_shader(struct radv_device *dev)
>>         b.shader->info.cs.local_size[1] = 1;
>>         b.shader->info.cs.local_size[2] = 1;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> @@ -67,8 +67,8 @@ build_buffer_copy_shader(struct radv_device *dev)
>>         b.shader->info.cs.local_size[1] = 1;
>>         b.shader->info.cs.local_size[2] = 1;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c
>> index 45df8438234..c8a733b3062 100644
>> --- a/src/amd/vulkan/radv_meta_bufimage.c
>> +++ b/src/amd/vulkan/radv_meta_bufimage.c
>> @@ -60,8 +60,8 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
>>         output_img->data.descriptor_set = 0;
>>         output_img->data.binding = 1;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> @@ -289,8 +289,8 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
>>         output_img->data.descriptor_set = 0;
>>         output_img->data.binding = 1;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> @@ -511,8 +511,8 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
>>         output_img->data.descriptor_set = 0;
>>         output_img->data.binding = 1;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> @@ -719,8 +719,8 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
>>         output_img->data.descriptor_set = 0;
>>         output_img->data.binding = 1;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> @@ -932,8 +932,8 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
>>         output_img->data.descriptor_set = 0;
>>         output_img->data.binding = 1;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> @@ -1139,8 +1139,8 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
>>         output_img->data.descriptor_set = 0;
>>         output_img->data.binding = 0;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> @@ -1331,8 +1331,8 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
>>         output_img->data.descriptor_set = 0;
>>         output_img->data.binding = 0;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
>> index 5805d39c4b3..2e1274c929d 100644
>> --- a/src/amd/vulkan/radv_meta_clear.c
>> +++ b/src/amd/vulkan/radv_meta_clear.c
>> @@ -1025,8 +1025,8 @@ build_clear_htile_mask_shader()
>>         b.shader->info.cs.local_size[1] = 1;
>>         b.shader->info.cs.local_size[2] = 1;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
>> index f2f68961e2e..136b596aa4f 100644
>> --- a/src/amd/vulkan/radv_meta_fast_clear.c
>> +++ b/src/amd/vulkan/radv_meta_fast_clear.c
>> @@ -58,8 +58,8 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
>>         output_img->data.descriptor_set = 0;
>>         output_img->data.binding = 1;
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
>> index e56df7f8a59..1ee8ce32ac0 100644
>> --- a/src/amd/vulkan/radv_meta_resolve_cs.c
>> +++ b/src/amd/vulkan/radv_meta_resolve_cs.c
>> @@ -99,8 +99,8 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
>>                                                        img_type, "out_img");
>>         output_img->data.descriptor_set = 0;
>>         output_img->data.binding = 1;
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
>> index d3baf2357ff..a7d53b938a7 100644
>> --- a/src/amd/vulkan/radv_query.c
>> +++ b/src/amd/vulkan/radv_query.c
>> @@ -153,8 +153,8 @@ build_occlusion_query_shader(struct radv_device *device) {
>>         nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
>>         nir_builder_instr_insert(&b, &src_buf->instr);
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> @@ -343,8 +343,8 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
>>         nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
>>         nir_builder_instr_insert(&b, &src_buf->instr);
>>
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> @@ -590,8 +590,8 @@ build_tfb_query_shader(struct radv_device *device)
>>         nir_builder_instr_insert(&b, &src_buf->instr);
>>
>>         /* Compute global ID. */
>> -       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
>> -       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
>> +       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32);
>> +       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
>>         nir_ssa_def *block_size = nir_imm_ivec4(&b,
>>                                                 b.shader->info.cs.local_size[0],
>>                                                 b.shader->info.cs.local_size[1],
>> diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
>> index 830c406b450..746ddd22d92 100644
>> --- a/src/compiler/nir/nir_intrinsics.py
>> +++ b/src/compiler/nir/nir_intrinsics.py
>> @@ -479,11 +479,11 @@ system_value("tess_coord", 3)
>>  system_value("tess_level_outer", 4)
>>  system_value("tess_level_inner", 2)
>>  system_value("patch_vertices_in", 1)
>> -system_value("local_invocation_id", 3)
>> -system_value("local_invocation_index", 1)
>> -system_value("work_group_id", 3)
>> +system_value("local_invocation_id", 3, bit_sizes=[32, 64])
>> +system_value("local_invocation_index", 1, bit_sizes=[32, 64])
>> +system_value("work_group_id", 3, bit_sizes=[32, 64])
>>  system_value("user_clip_plane", 4, indices=[UCP_ID])
>> -system_value("num_work_groups", 3)
>> +system_value("num_work_groups", 3, bit_sizes=[32, 64])
>>  system_value("helper_invocation", 1)
>>  system_value("alpha_ref_float", 1)
>>  system_value("layer_id", 1)
>> @@ -497,8 +497,8 @@ system_value("subgroup_le_mask", 0, bit_sizes=[32, 64])
>>  system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64])
>>  system_value("num_subgroups", 1)
>>  system_value("subgroup_id", 1)
>> -system_value("local_group_size", 3)
>> -system_value("global_invocation_id", 3)
>> +system_value("local_group_size", 3, bit_sizes=[32, 64])
>> +system_value("global_invocation_id", 3, bit_sizes=[32, 64])
>>  system_value("work_dim", 1)
>>
>>  # Blend constant color values.  Float values are clamped.#
>> diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
>> index 68b0ea89c8d..3cb9f224ecd 100644
>> --- a/src/compiler/nir/nir_lower_system_values.c
>> +++ b/src/compiler/nir/nir_lower_system_values.c
>> @@ -29,7 +29,7 @@
>>  #include "nir_builder.h"
>>
>>  static nir_ssa_def*
>> -build_local_group_size(nir_builder *b)
>> +build_local_group_size(nir_builder *b, unsigned bit_size)
>>  {
>>     nir_ssa_def *local_size;
>>
>> @@ -38,21 +38,27 @@ build_local_group_size(nir_builder *b)
>>      * point, but its intrinsic can still be used.
>>      */
>>     if (b->shader->info.cs.local_size_variable) {
>> -      local_size = nir_load_local_group_size(b);
>> +      local_size = nir_load_local_group_size(b, bit_size);
>>     } else {
>>        nir_const_value local_size_const;
>>        memset(&local_size_const, 0, sizeof(local_size_const));
>> -      local_size_const.u32[0] = b->shader->info.cs.local_size[0];
>> -      local_size_const.u32[1] = b->shader->info.cs.local_size[1];
>> -      local_size_const.u32[2] = b->shader->info.cs.local_size[2];
>> -      local_size = nir_build_imm(b, 3, 32, local_size_const);
>> +      if (bit_size == 32) {
>> +         local_size_const.u32[0] = b->shader->info.cs.local_size[0];
>> +         local_size_const.u32[1] = b->shader->info.cs.local_size[1];
>> +         local_size_const.u32[2] = b->shader->info.cs.local_size[2];
>> +      } else {
>
>
> assert(bit_size == 64);
>
>>
>> +         local_size_const.u64[0] = b->shader->info.cs.local_size[0];
>> +         local_size_const.u64[1] = b->shader->info.cs.local_size[1];
>> +         local_size_const.u64[2] = b->shader->info.cs.local_size[2];
>> +      }
>> +      local_size = nir_build_imm(b, 3, bit_size, local_size_const);
>>     }
>>
>>     return local_size;
>>  }
>>
>>  static nir_ssa_def *
>> -build_local_invocation_id(nir_builder *b)
>> +build_local_invocation_id(nir_builder *b, unsigned bit_size)
>>  {
>>     if (b->shader->options->lower_cs_local_id_from_index) {
>>        /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
>> @@ -72,8 +78,8 @@ build_local_invocation_id(nir_builder *b)
>>         * accidentally end up with a gl_LocalInvocationIndex that is too
>>         * large so it can safely be omitted.
>>         */
>> -      nir_ssa_def *local_index = nir_load_local_invocation_index(b);
>> -      nir_ssa_def *local_size = build_local_group_size(b);
>> +      nir_ssa_def *local_index = nir_load_local_invocation_index(b, bit_size);
>> +      nir_ssa_def *local_size = build_local_group_size(b, bit_size);
>
>
> In my iris clover branch, I instead have a line at that just does "if (bit_size == 64) id = nir_u2u64(b, id)" where "id" is the final computed 3D ID just before returning it.  This lets us satisfy the CL requirements while still only using 32-bit system values and doing the calculation in 32 bits.
>
>>
>>
>>        nir_ssa_def *id_x, *id_y, *id_z;
>>        id_x = nir_umod(b, local_index,
>> @@ -86,7 +92,7 @@ build_local_invocation_id(nir_builder *b)
>>                                       nir_channel(b, local_size, 1)));
>
>
> In other words, righ here we have
>
> nir_ssa_def *id = nir_vec3(b, id_x, id_y, id_z);
>
> if (bit_size == 64)
>    id = nir_u2u64(b, id);
>
> return id;
>
> It's kind of nice to avoid unneeded 64-bit math especially when some of that math is division/modulus. :-)
>

yeah, I think for the local id we can assume it should fit nicely within 32 bit

>>
>>        return nir_vec3(b, id_x, id_y, id_z);
>>     } else {
>> -      return nir_load_local_invocation_id(b);
>> +      return nir_load_local_invocation_id(b, bit_size);
>>     }
>>  }
>>
>> @@ -120,6 +126,7 @@ convert_block(nir_block *block, nir_builder *b)
>>
>>        b->cursor = nir_after_instr(&load_deref->instr);
>>
>> +      unsigned bit_size = nir_dest_bit_size(load_deref->dest);
>>        nir_ssa_def *sysval = NULL;
>>        switch (var->data.location) {
>>        case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {
>> @@ -128,9 +135,9 @@ convert_block(nir_block *block, nir_builder *b)
>>            *    "The value of gl_GlobalInvocationID is equal to
>>            *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
>>            */
>> -         nir_ssa_def *group_size = build_local_group_size(b);
>> -         nir_ssa_def *group_id = nir_load_work_group_id(b);
>> -         nir_ssa_def *local_id = build_local_invocation_id(b);
>> +         nir_ssa_def *group_size = build_local_group_size(b, bit_size);
>> +         nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
>> +         nir_ssa_def *local_id = build_local_invocation_id(b, bit_size);
>>
>>           sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);
>>           break;
>> @@ -150,7 +157,7 @@ convert_block(nir_block *block, nir_builder *b)
>>            *    gl_WorkGroupSize.y + gl_LocalInvocationID.y *
>>            *    gl_WorkGroupSize.x + gl_LocalInvocationID.x"
>>            */
>> -         nir_ssa_def *local_id = nir_load_local_invocation_id(b);
>> +         nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size);
>>
>>           nir_ssa_def *size_x =
>>              nir_imm_int(b, b->shader->info.cs.local_size[0]);
>> @@ -170,11 +177,11 @@ convert_block(nir_block *block, nir_builder *b)
>>            * index from the local id.
>>            */
>>           if (b->shader->options->lower_cs_local_id_from_index)
>> -            sysval = build_local_invocation_id(b);
>> +            sysval = build_local_invocation_id(b, bit_size);
>>           break;
>>
>>        case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
>> -         sysval = build_local_group_size(b);
>> +         sysval = build_local_group_size(b, bit_size);
>>           break;
>>        }
>>
>> @@ -248,8 +255,8 @@ convert_block(nir_block *block, nir_builder *b)
>>           break;
>>
>>        case SYSTEM_VALUE_GLOBAL_GROUP_SIZE: {
>> -         nir_ssa_def *group_size = build_local_group_size(b);
>> -         nir_ssa_def *num_work_groups = nir_load_num_work_groups(b);
>> +         nir_ssa_def *group_size = build_local_group_size(b, bit_size);
>> +         nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size);
>>           sysval = nir_imul(b, group_size, num_work_groups);
>>           break;
>>        }
>> --
>> 2.19.2
>>


More information about the mesa-dev mailing list